forked from OSchip/llvm-project
Revert r345165 "[X86] Bring back the MOV64r0 pseudo instruction"
Google is reporting regressions on some benchmarks. llvm-svn: 345785
This commit is contained in:
parent
7045c72b95
commit
6c3f1692c8
|
@ -1916,8 +1916,8 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) {
|
|||
{ &X86::GR64RegClass, X86::RAX, X86::RDX, {
|
||||
{ X86::IDIV64r, X86::CQO, Copy, X86::RAX, S }, // SDiv
|
||||
{ X86::IDIV64r, X86::CQO, Copy, X86::RDX, S }, // SRem
|
||||
{ X86::DIV64r, X86::MOV64r0, Copy, X86::RAX, U }, // UDiv
|
||||
{ X86::DIV64r, X86::MOV64r0, Copy, X86::RDX, U }, // URem
|
||||
{ X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U }, // UDiv
|
||||
{ X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U }, // URem
|
||||
}
|
||||
}, // i64
|
||||
};
|
||||
|
@ -1964,22 +1964,26 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) {
|
|||
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
||||
TII.get(OpEntry.OpSignExtend));
|
||||
else {
|
||||
unsigned ZeroReg = createResultReg(VT == MVT::i64 ? &X86::GR64RegClass
|
||||
: &X86::GR32RegClass);
|
||||
unsigned Zero32 = createResultReg(&X86::GR32RegClass);
|
||||
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
||||
TII.get(OpEntry.OpSignExtend), ZeroReg);
|
||||
TII.get(X86::MOV32r0), Zero32);
|
||||
|
||||
// Copy the zero into the appropriate sub/super/identical physical
|
||||
// register. Unfortunately the operations needed are not uniform enough
|
||||
// to fit neatly into the table above.
|
||||
if (VT == MVT::i16)
|
||||
if (VT == MVT::i16) {
|
||||
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
||||
TII.get(Copy), TypeEntry.HighInReg)
|
||||
.addReg(ZeroReg, 0, X86::sub_16bit);
|
||||
else
|
||||
.addReg(Zero32, 0, X86::sub_16bit);
|
||||
} else if (VT == MVT::i32) {
|
||||
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
||||
TII.get(Copy), TypeEntry.HighInReg)
|
||||
.addReg(ZeroReg);
|
||||
.addReg(Zero32);
|
||||
} else if (VT == MVT::i64) {
|
||||
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
||||
TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
|
||||
.addImm(0).addReg(Zero32).addImm(X86::sub_32bit);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Generate the DIV/IDIV instruction.
|
||||
|
@ -3704,9 +3708,6 @@ unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
|
|||
|
||||
uint64_t Imm = CI->getZExtValue();
|
||||
if (Imm == 0) {
|
||||
if (VT.SimpleTy == MVT::i64)
|
||||
return fastEmitInst_(X86::MOV64r0, &X86::GR64RegClass);
|
||||
|
||||
unsigned SrcReg = fastEmitInst_(X86::MOV32r0, &X86::GR32RegClass);
|
||||
switch (VT.SimpleTy) {
|
||||
default: llvm_unreachable("Unexpected value type");
|
||||
|
@ -3719,6 +3720,13 @@ unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
|
|||
X86::sub_16bit);
|
||||
case MVT::i32:
|
||||
return SrcReg;
|
||||
case MVT::i64: {
|
||||
unsigned ResultReg = createResultReg(&X86::GR64RegClass);
|
||||
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
||||
TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
|
||||
.addImm(0).addReg(SrcReg).addImm(X86::sub_32bit);
|
||||
return ResultReg;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -3591,10 +3591,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
|
|||
SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
|
||||
} else {
|
||||
// Zero out the high part, effectively zero extending the input.
|
||||
unsigned ClrOpc = NVT.SimpleTy == MVT::i64 ? X86::MOV64r0
|
||||
: X86::MOV32r0;
|
||||
MVT ClrVT = NVT.SimpleTy == MVT::i64 ? MVT::i64 : MVT::i32;
|
||||
SDValue ClrNode = SDValue(CurDAG->getMachineNode(ClrOpc, dl, ClrVT), 0);
|
||||
SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0);
|
||||
switch (NVT.SimpleTy) {
|
||||
case MVT::i16:
|
||||
ClrNode =
|
||||
|
@ -3605,7 +3602,15 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
|
|||
0);
|
||||
break;
|
||||
case MVT::i32:
|
||||
break;
|
||||
case MVT::i64:
|
||||
ClrNode =
|
||||
SDValue(CurDAG->getMachineNode(
|
||||
TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
|
||||
CurDAG->getTargetConstant(0, dl, MVT::i64), ClrNode,
|
||||
CurDAG->getTargetConstant(X86::sub_32bit, dl,
|
||||
MVT::i32)),
|
||||
0);
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("Unexpected division source");
|
||||
|
|
|
@ -270,18 +270,16 @@ def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins), "", []>;
|
|||
// Alias instruction mapping movr0 to xor.
|
||||
// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
|
||||
let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
|
||||
isPseudo = 1, AddedComplexity = 10 in {
|
||||
isPseudo = 1, AddedComplexity = 10 in
|
||||
def MOV32r0 : I<0, Pseudo, (outs GR32:$dst), (ins), "",
|
||||
[(set GR32:$dst, 0)]>, Sched<[WriteZero]>;
|
||||
def MOV64r0 : I<0, Pseudo, (outs GR64:$dst), (ins), "",
|
||||
[(set GR64:$dst, 0)]>, Sched<[WriteZero]>;
|
||||
}
|
||||
|
||||
// Other widths can also make use of the 32-bit xor, which may have a smaller
|
||||
// encoding and avoid partial register updates.
|
||||
let AddedComplexity = 10 in {
|
||||
def : Pat<(i8 0), (EXTRACT_SUBREG (MOV32r0), sub_8bit)>;
|
||||
def : Pat<(i16 0), (EXTRACT_SUBREG (MOV32r0), sub_16bit)>;
|
||||
def : Pat<(i64 0), (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit)>;
|
||||
}
|
||||
|
||||
let Predicates = [OptForSize, Not64BitMode],
|
||||
|
|
|
@ -683,10 +683,8 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
|
|||
if (ClobbersEFLAGS && !isSafeToClobberEFLAGS(MBB, I)) {
|
||||
// The instruction clobbers EFLAGS. Re-materialize as MOV32ri to avoid side
|
||||
// effects.
|
||||
unsigned NewOpc = X86::MOV32ri;
|
||||
int Value;
|
||||
switch (Orig.getOpcode()) {
|
||||
case X86::MOV64r0: NewOpc = X86::MOV32ri64; Value = 0; break;
|
||||
case X86::MOV32r0: Value = 0; break;
|
||||
case X86::MOV32r1: Value = 1; break;
|
||||
case X86::MOV32r_1: Value = -1; break;
|
||||
|
@ -695,7 +693,7 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
|
|||
}
|
||||
|
||||
const DebugLoc &DL = Orig.getDebugLoc();
|
||||
BuildMI(MBB, I, DL, get(NewOpc))
|
||||
BuildMI(MBB, I, DL, get(X86::MOV32ri))
|
||||
.add(Orig.getOperand(0))
|
||||
.addImm(Value);
|
||||
} else {
|
||||
|
@ -3752,9 +3750,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
|
|||
// MOV32r0 etc. are implemented with xor which clobbers condition code.
|
||||
// They are safe to move up, if the definition to EFLAGS is dead and
|
||||
// earlier instructions do not read or write EFLAGS.
|
||||
if (!Movr0Inst &&
|
||||
(Instr.getOpcode() == X86::MOV32r0 ||
|
||||
Instr.getOpcode() == X86::MOV64r0) &&
|
||||
if (!Movr0Inst && Instr.getOpcode() == X86::MOV32r0 &&
|
||||
Instr.registerDefIsDead(X86::EFLAGS, TRI)) {
|
||||
Movr0Inst = &Instr;
|
||||
continue;
|
||||
|
@ -4159,15 +4155,6 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
|
|||
switch (MI.getOpcode()) {
|
||||
case X86::MOV32r0:
|
||||
return Expand2AddrUndef(MIB, get(X86::XOR32rr));
|
||||
case X86::MOV64r0: {
|
||||
const TargetRegisterInfo *TRI = &getRegisterInfo();
|
||||
unsigned Reg = MIB->getOperand(0).getReg();
|
||||
unsigned Reg32 = TRI->getSubReg(Reg, X86::sub_32bit);
|
||||
MIB->getOperand(0).setReg(Reg32);
|
||||
Expand2AddrUndef(MIB, get(X86::XOR32rr));
|
||||
MIB.addReg(Reg, RegState::ImplicitDefine);
|
||||
return true;
|
||||
}
|
||||
case X86::MOV32r1:
|
||||
return expandMOV32r1(MIB, *this, /*MinusOne=*/ false);
|
||||
case X86::MOV32r_1:
|
||||
|
@ -4911,10 +4898,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
|
|||
isTwoAddrFold = true;
|
||||
} else {
|
||||
if (OpNum == 0) {
|
||||
if (MI.getOpcode() == X86::MOV32r0 || MI.getOpcode() == X86::MOV64r0) {
|
||||
unsigned NewOpc = MI.getOpcode() == X86::MOV64r0 ? X86::MOV64mi32
|
||||
: X86::MOV32mi;
|
||||
NewMI = MakeM0Inst(*this, NewOpc, MOs, InsertPt, MI);
|
||||
if (MI.getOpcode() == X86::MOV32r0) {
|
||||
NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, InsertPt, MI);
|
||||
if (NewMI)
|
||||
return NewMI;
|
||||
}
|
||||
|
|
|
@ -487,14 +487,20 @@ bool X86SpeculativeLoadHardeningPass::runOnMachineFunction(
|
|||
// Otherwise, just build the predicate state itself by zeroing a register
|
||||
// as we don't need any initial state.
|
||||
PS->InitialReg = MRI->createVirtualRegister(PS->RC);
|
||||
auto ZeroI = BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::MOV64r0),
|
||||
PS->InitialReg);
|
||||
unsigned PredStateSubReg = MRI->createVirtualRegister(&X86::GR32RegClass);
|
||||
auto ZeroI = BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::MOV32r0),
|
||||
PredStateSubReg);
|
||||
++NumInstsInserted;
|
||||
MachineOperand *ZeroEFLAGSDefOp =
|
||||
ZeroI->findRegisterDefOperand(X86::EFLAGS);
|
||||
assert(ZeroEFLAGSDefOp && ZeroEFLAGSDefOp->isImplicit() &&
|
||||
"Must have an implicit def of EFLAGS!");
|
||||
ZeroEFLAGSDefOp->setIsDead(true);
|
||||
BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::SUBREG_TO_REG),
|
||||
PS->InitialReg)
|
||||
.addImm(0)
|
||||
.addReg(PredStateSubReg)
|
||||
.addImm(X86::sub_32bit);
|
||||
}
|
||||
|
||||
// We're going to need to trace predicate state throughout the function's
|
||||
|
|
|
@ -54,7 +54,7 @@ define i64 @const_i64_i32() {
|
|||
define void @main(i32 ** %data) {
|
||||
; ALL-LABEL: main:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: xorl %eax, %eax
|
||||
; ALL-NEXT: movq $0, %rax
|
||||
; ALL-NEXT: movq %rax, (%rdi)
|
||||
; ALL-NEXT: retq
|
||||
store i32* null, i32** %data, align 8
|
||||
|
|
|
@ -2141,7 +2141,7 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
|
|||
; AVX1-NEXT: pushq %r13
|
||||
; AVX1-NEXT: pushq %r12
|
||||
; AVX1-NEXT: pushq %rbx
|
||||
; AVX1-NEXT: subq $16, %rsp
|
||||
; AVX1-NEXT: subq $24, %rsp
|
||||
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
|
||||
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
|
||||
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
|
||||
|
@ -2152,12 +2152,12 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
|
|||
; AVX1-NEXT: vmovq %xmm5, %rbp
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,3,0,1]
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
|
||||
; AVX1-NEXT: vpextrq $1, %xmm4, %rcx
|
||||
; AVX1-NEXT: vmovq %xmm4, %rsi
|
||||
; AVX1-NEXT: vpextrq $1, %xmm4, %rsi
|
||||
; AVX1-NEXT: vmovq %xmm4, %rcx
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
|
||||
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm3[0],zero,xmm3[1],zero
|
||||
; AVX1-NEXT: vpextrq $1, %xmm4, %r10
|
||||
; AVX1-NEXT: vpextrq $1, %xmm4, %r8
|
||||
; AVX1-NEXT: vmovq %xmm4, %r11
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero
|
||||
|
@ -2166,7 +2166,7 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
|
|||
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm3[0],zero,xmm3[1],zero
|
||||
; AVX1-NEXT: vpextrq $1, %xmm4, %r15
|
||||
; AVX1-NEXT: vmovq %xmm4, %rdx
|
||||
; AVX1-NEXT: vmovq %xmm4, %rdi
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero
|
||||
; AVX1-NEXT: vpextrq $1, %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
|
||||
|
@ -2175,28 +2175,27 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
|
|||
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX1-NEXT: vpextrq $1, %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
|
||||
; AVX1-NEXT: vmovq %xmm3, %r9
|
||||
; AVX1-NEXT: vmovq %xmm3, %r10
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm3[0],zero,xmm3[1],zero
|
||||
; AVX1-NEXT: vpextrq $1, %xmm4, %r14
|
||||
; AVX1-NEXT: addq %rbx, %r14
|
||||
; AVX1-NEXT: vmovq %xmm4, %r8
|
||||
; AVX1-NEXT: addq %rbp, %r8
|
||||
; AVX1-NEXT: vpextrq $1, %xmm4, %rdx
|
||||
; AVX1-NEXT: addq %rbx, %rdx
|
||||
; AVX1-NEXT: vmovq %xmm4, %r9
|
||||
; AVX1-NEXT: addq %rbp, %r9
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero
|
||||
; AVX1-NEXT: vpextrq $1, %xmm3, %rdi
|
||||
; AVX1-NEXT: addq %rcx, %rdi
|
||||
; AVX1-NEXT: vmovq %xmm3, %rax
|
||||
; AVX1-NEXT: vpextrq $1, %xmm3, %rax
|
||||
; AVX1-NEXT: addq %rsi, %rax
|
||||
; AVX1-NEXT: movq %rax, %rsi
|
||||
; AVX1-NEXT: movq %rax, %r14
|
||||
; AVX1-NEXT: vmovq %xmm3, %rbp
|
||||
; AVX1-NEXT: addq %rcx, %rbp
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
|
||||
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero
|
||||
; AVX1-NEXT: vpextrq $1, %xmm3, %rax
|
||||
; AVX1-NEXT: addq %r10, %rax
|
||||
; AVX1-NEXT: movq %rax, %r10
|
||||
; AVX1-NEXT: vpextrq $1, %xmm3, %rsi
|
||||
; AVX1-NEXT: addq %r8, %rsi
|
||||
; AVX1-NEXT: vmovq %xmm3, %rax
|
||||
; AVX1-NEXT: addq %r11, %rax
|
||||
; AVX1-NEXT: movq %rax, %r11
|
||||
|
@ -2204,17 +2203,17 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
|
|||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
|
||||
; AVX1-NEXT: vpextrq $1, %xmm2, %rax
|
||||
; AVX1-NEXT: addq %r13, %rax
|
||||
; AVX1-NEXT: movq %rax, %rbx
|
||||
; AVX1-NEXT: movq %rax, %rcx
|
||||
; AVX1-NEXT: vmovq %xmm2, %rax
|
||||
; AVX1-NEXT: addq %r12, %rax
|
||||
; AVX1-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX1-NEXT: movq %rax, %r8
|
||||
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero
|
||||
; AVX1-NEXT: vpextrq $1, %xmm3, %rax
|
||||
; AVX1-NEXT: addq %r15, %rax
|
||||
; AVX1-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX1-NEXT: movq %rax, %rbx
|
||||
; AVX1-NEXT: vmovq %xmm3, %rax
|
||||
; AVX1-NEXT: addq %rdx, %rax
|
||||
; AVX1-NEXT: addq %rdi, %rax
|
||||
; AVX1-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
|
||||
|
@ -2227,30 +2226,36 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
|
|||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero
|
||||
; AVX1-NEXT: vpextrq $1, %xmm2, %rbp
|
||||
; AVX1-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
|
||||
; AVX1-NEXT: vmovq %xmm2, %r15
|
||||
; AVX1-NEXT: addq %r9, %r15
|
||||
; AVX1-NEXT: vpextrq $1, %xmm2, %rax
|
||||
; AVX1-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
|
||||
; AVX1-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX1-NEXT: vmovq %xmm2, %r12
|
||||
; AVX1-NEXT: addq %r10, %r12
|
||||
; AVX1-NEXT: vpextrq $1, %xmm1, %rax
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
||||
; AVX1-NEXT: vpextrq $1, %xmm0, %r9
|
||||
; AVX1-NEXT: addq %rax, %r9
|
||||
; AVX1-NEXT: vmovq %xmm1, %rcx
|
||||
; AVX1-NEXT: vmovq %xmm0, %rdx
|
||||
; AVX1-NEXT: addq %rcx, %rdx
|
||||
; AVX1-NEXT: addq $-1, %r14
|
||||
; AVX1-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX1-NEXT: vpextrq $1, %xmm0, %r10
|
||||
; AVX1-NEXT: addq %rax, %r10
|
||||
; AVX1-NEXT: vmovq %xmm1, %rax
|
||||
; AVX1-NEXT: vmovq %xmm0, %rdi
|
||||
; AVX1-NEXT: addq %rax, %rdi
|
||||
; AVX1-NEXT: addq $-1, %rdx
|
||||
; AVX1-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX1-NEXT: movl $0, %eax
|
||||
; AVX1-NEXT: adcq $-1, %rax
|
||||
; AVX1-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX1-NEXT: addq $-1, %r8
|
||||
; AVX1-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX1-NEXT: addq $-1, %r9
|
||||
; AVX1-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX1-NEXT: movl $0, %eax
|
||||
; AVX1-NEXT: adcq $-1, %rax
|
||||
; AVX1-NEXT: movq %rax, (%rsp) # 8-byte Spill
|
||||
; AVX1-NEXT: addq $-1, %rdi
|
||||
; AVX1-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX1-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX1-NEXT: addq $-1, %r14
|
||||
; AVX1-NEXT: movq %r14, (%rsp) # 8-byte Spill
|
||||
; AVX1-NEXT: movl $0, %eax
|
||||
; AVX1-NEXT: adcq $-1, %rax
|
||||
; AVX1-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX1-NEXT: addq $-1, %rbp
|
||||
; AVX1-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX1-NEXT: movl $0, %eax
|
||||
; AVX1-NEXT: adcq $-1, %rax
|
||||
; AVX1-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
|
@ -2259,100 +2264,98 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
|
|||
; AVX1-NEXT: movl $0, %eax
|
||||
; AVX1-NEXT: adcq $-1, %rax
|
||||
; AVX1-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX1-NEXT: addq $-1, %r10
|
||||
; AVX1-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX1-NEXT: movl $0, %eax
|
||||
; AVX1-NEXT: adcq $-1, %rax
|
||||
; AVX1-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX1-NEXT: addq $-1, %r11
|
||||
; AVX1-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX1-NEXT: movl $0, %eax
|
||||
; AVX1-NEXT: adcq $-1, %rax
|
||||
; AVX1-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX1-NEXT: addq $-1, %rcx
|
||||
; AVX1-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX1-NEXT: movl $0, %ebp
|
||||
; AVX1-NEXT: adcq $-1, %rbp
|
||||
; AVX1-NEXT: addq $-1, %r8
|
||||
; AVX1-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX1-NEXT: movl $0, %r15d
|
||||
; AVX1-NEXT: adcq $-1, %r15
|
||||
; AVX1-NEXT: addq $-1, %rbx
|
||||
; AVX1-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX1-NEXT: movl $0, %eax
|
||||
; AVX1-NEXT: adcq $-1, %rax
|
||||
; AVX1-NEXT: movq %rax, %rsi
|
||||
; AVX1-NEXT: addq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
|
||||
; AVX1-NEXT: movl $0, %r12d
|
||||
; AVX1-NEXT: adcq $-1, %r12
|
||||
; AVX1-NEXT: addq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
|
||||
; AVX1-NEXT: movl $0, %r13d
|
||||
; AVX1-NEXT: adcq $-1, %r13
|
||||
; AVX1-NEXT: addq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
|
||||
; AVX1-NEXT: movl $0, %r14d
|
||||
; AVX1-NEXT: adcq $-1, %r14
|
||||
; AVX1-NEXT: addq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
|
||||
; AVX1-NEXT: movl $0, %ebx
|
||||
; AVX1-NEXT: adcq $-1, %rbx
|
||||
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
|
||||
; AVX1-NEXT: addq $-1, %rcx
|
||||
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
|
||||
; AVX1-NEXT: addq $-1, %rdx
|
||||
; AVX1-NEXT: movl $0, %r11d
|
||||
; AVX1-NEXT: adcq $-1, %r11
|
||||
; AVX1-NEXT: addq $-1, %rbp
|
||||
; AVX1-NEXT: movl $0, %r10d
|
||||
; AVX1-NEXT: adcq $-1, %r10
|
||||
; AVX1-NEXT: addq $-1, %r15
|
||||
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
|
||||
; AVX1-NEXT: addq $-1, %rax
|
||||
; AVX1-NEXT: movl $0, %ebx
|
||||
; AVX1-NEXT: adcq $-1, %rbx
|
||||
; AVX1-NEXT: addq $-1, %r12
|
||||
; AVX1-NEXT: movl $0, %r9d
|
||||
; AVX1-NEXT: adcq $-1, %r9
|
||||
; AVX1-NEXT: addq $-1, %r10
|
||||
; AVX1-NEXT: movl $0, %r8d
|
||||
; AVX1-NEXT: adcq $-1, %r8
|
||||
; AVX1-NEXT: addq $-1, %r9
|
||||
; AVX1-NEXT: movl $0, %edi
|
||||
; AVX1-NEXT: adcq $-1, %rdi
|
||||
; AVX1-NEXT: addq $-1, %rdx
|
||||
; AVX1-NEXT: movl $0, %eax
|
||||
; AVX1-NEXT: adcq $-1, %rax
|
||||
; AVX1-NEXT: shldq $63, %rdx, %rax
|
||||
; AVX1-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX1-NEXT: shldq $63, %r9, %rdi
|
||||
; AVX1-NEXT: shldq $63, %r15, %r8
|
||||
; AVX1-NEXT: shldq $63, %rbp, %r10
|
||||
; AVX1-NEXT: shldq $63, %rcx, %r11
|
||||
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
|
||||
; AVX1-NEXT: shldq $63, %rdx, %rbx
|
||||
; AVX1-NEXT: addq $-1, %rdi
|
||||
; AVX1-NEXT: movl $0, %ecx
|
||||
; AVX1-NEXT: adcq $-1, %rcx
|
||||
; AVX1-NEXT: shldq $63, %rdi, %rcx
|
||||
; AVX1-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX1-NEXT: shldq $63, %r10, %r8
|
||||
; AVX1-NEXT: shldq $63, %r12, %r9
|
||||
; AVX1-NEXT: shldq $63, %rax, %rbx
|
||||
; AVX1-NEXT: shldq $63, %rdx, %r11
|
||||
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
|
||||
; AVX1-NEXT: shldq $63, %rdx, %r14
|
||||
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
|
||||
; AVX1-NEXT: shldq $63, %rdx, %r13
|
||||
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
|
||||
; AVX1-NEXT: shldq $63, %rdx, %r12
|
||||
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
|
||||
; AVX1-NEXT: shldq $63, %rax, %rsi
|
||||
; AVX1-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
|
||||
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
|
||||
; AVX1-NEXT: shldq $63, %rax, %rsi
|
||||
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
|
||||
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
|
||||
; AVX1-NEXT: shldq $63, %rax, %r15
|
||||
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
|
||||
; AVX1-NEXT: shldq $63, %rax, %rbp
|
||||
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
|
||||
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
|
||||
; AVX1-NEXT: shldq $63, %rax, %rsi
|
||||
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
|
||||
; AVX1-NEXT: shldq $63, %rcx, %rax
|
||||
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
|
||||
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
|
||||
; AVX1-NEXT: shldq $63, %rcx, %r9
|
||||
; AVX1-NEXT: movq (%rsp), %rcx # 8-byte Reload
|
||||
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
|
||||
; AVX1-NEXT: shldq $63, %rax, %rcx
|
||||
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
|
||||
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
|
||||
; AVX1-NEXT: shldq $63, %rax, %rdi
|
||||
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
|
||||
; AVX1-NEXT: movq (%rsp), %rax # 8-byte Reload
|
||||
; AVX1-NEXT: shldq $63, %rax, %r12
|
||||
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
|
||||
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
|
||||
; AVX1-NEXT: shldq $63, %rax, %r10
|
||||
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
|
||||
; AVX1-NEXT: shldq $63, %rdx, %rcx
|
||||
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
|
||||
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
|
||||
; AVX1-NEXT: shldq $63, %rbp, %rdx
|
||||
; AVX1-NEXT: vmovq %rdx, %xmm8
|
||||
; AVX1-NEXT: vmovq %rcx, %xmm0
|
||||
; AVX1-NEXT: vmovq %r9, %xmm1
|
||||
; AVX1-NEXT: vmovq %rax, %xmm11
|
||||
; AVX1-NEXT: vmovq %r15, %xmm2
|
||||
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
|
||||
; AVX1-NEXT: shldq $63, %rdx, %rax
|
||||
; AVX1-NEXT: vmovq %rax, %xmm8
|
||||
; AVX1-NEXT: vmovq %r10, %xmm0
|
||||
; AVX1-NEXT: vmovq %r12, %xmm1
|
||||
; AVX1-NEXT: vmovq %rdi, %xmm11
|
||||
; AVX1-NEXT: vmovq %rcx, %xmm2
|
||||
; AVX1-NEXT: vmovq %rsi, %xmm13
|
||||
; AVX1-NEXT: vmovq {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 8-byte Folded Reload
|
||||
; AVX1-NEXT: # xmm14 = mem[0],zero
|
||||
; AVX1-NEXT: vmovq %r12, %xmm15
|
||||
; AVX1-NEXT: vmovq %r13, %xmm9
|
||||
; AVX1-NEXT: vmovq %r14, %xmm10
|
||||
; AVX1-NEXT: vmovq %rbx, %xmm12
|
||||
; AVX1-NEXT: vmovq %rbp, %xmm14
|
||||
; AVX1-NEXT: vmovq %r15, %xmm15
|
||||
; AVX1-NEXT: vmovq {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 8-byte Folded Reload
|
||||
; AVX1-NEXT: # xmm9 = mem[0],zero
|
||||
; AVX1-NEXT: vmovq %r13, %xmm10
|
||||
; AVX1-NEXT: vmovq %r14, %xmm12
|
||||
; AVX1-NEXT: vmovq %r11, %xmm3
|
||||
; AVX1-NEXT: vmovq %r10, %xmm4
|
||||
; AVX1-NEXT: vmovq %r8, %xmm5
|
||||
; AVX1-NEXT: vmovq %rdi, %xmm6
|
||||
; AVX1-NEXT: vmovq %rbx, %xmm4
|
||||
; AVX1-NEXT: vmovq %r9, %xmm5
|
||||
; AVX1-NEXT: vmovq %r8, %xmm6
|
||||
; AVX1-NEXT: vmovq {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 8-byte Folded Reload
|
||||
; AVX1-NEXT: # xmm7 = mem[0],zero
|
||||
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm8 = xmm0[0],xmm8[0]
|
||||
|
@ -2379,7 +2382,7 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
|
|||
; AVX1-NEXT: vpshufb %xmm4, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX1-NEXT: vmovdqu %xmm0, (%rax)
|
||||
; AVX1-NEXT: addq $16, %rsp
|
||||
; AVX1-NEXT: addq $24, %rsp
|
||||
; AVX1-NEXT: popq %rbx
|
||||
; AVX1-NEXT: popq %r12
|
||||
; AVX1-NEXT: popq %r13
|
||||
|
@ -2404,15 +2407,15 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
|
|||
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4
|
||||
; AVX2-NEXT: vpextrq $1, %xmm4, %rbx
|
||||
; AVX2-NEXT: vmovq %xmm4, %rdx
|
||||
; AVX2-NEXT: vmovq %xmm4, %rbp
|
||||
; AVX2-NEXT: vpextrq $1, %xmm3, %rdi
|
||||
; AVX2-NEXT: vmovq %xmm3, %rcx
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm2
|
||||
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3
|
||||
; AVX2-NEXT: vpextrq $1, %xmm3, %r9
|
||||
; AVX2-NEXT: vmovq %xmm3, %r10
|
||||
; AVX2-NEXT: vpextrq $1, %xmm2, %r13
|
||||
; AVX2-NEXT: vpextrq $1, %xmm3, %rdx
|
||||
; AVX2-NEXT: vmovq %xmm3, %r9
|
||||
; AVX2-NEXT: vpextrq $1, %xmm2, %r11
|
||||
; AVX2-NEXT: vmovq %xmm2, %r12
|
||||
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
|
||||
|
@ -2430,26 +2433,26 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
|
|||
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
|
||||
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4
|
||||
; AVX2-NEXT: vpextrq $1, %xmm4, %rbp
|
||||
; AVX2-NEXT: addq %rbx, %rbp
|
||||
; AVX2-NEXT: vmovq %xmm4, %rax
|
||||
; AVX2-NEXT: addq %rdx, %rax
|
||||
; AVX2-NEXT: movq %rax, %r11
|
||||
; AVX2-NEXT: vpextrq $1, %xmm3, %r8
|
||||
; AVX2-NEXT: addq %rdi, %r8
|
||||
; AVX2-NEXT: vpextrq $1, %xmm4, %rax
|
||||
; AVX2-NEXT: addq %rbx, %rax
|
||||
; AVX2-NEXT: movq %rax, %rbx
|
||||
; AVX2-NEXT: vmovq %xmm4, %r13
|
||||
; AVX2-NEXT: addq %rbp, %r13
|
||||
; AVX2-NEXT: vpextrq $1, %xmm3, %r10
|
||||
; AVX2-NEXT: addq %rdi, %r10
|
||||
; AVX2-NEXT: vmovq %xmm3, %r14
|
||||
; AVX2-NEXT: addq %rcx, %r14
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm2
|
||||
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3
|
||||
; AVX2-NEXT: vpextrq $1, %xmm3, %rax
|
||||
; AVX2-NEXT: addq %r9, %rax
|
||||
; AVX2-NEXT: movq %rax, %rbx
|
||||
; AVX2-NEXT: vmovq %xmm3, %rax
|
||||
; AVX2-NEXT: addq %r10, %rax
|
||||
; AVX2-NEXT: movq %rax, %r10
|
||||
; AVX2-NEXT: vpextrq $1, %xmm2, %rcx
|
||||
; AVX2-NEXT: addq %r13, %rcx
|
||||
; AVX2-NEXT: addq %rdx, %rax
|
||||
; AVX2-NEXT: movq %rax, %rcx
|
||||
; AVX2-NEXT: vmovq %xmm3, %r8
|
||||
; AVX2-NEXT: addq %r9, %r8
|
||||
; AVX2-NEXT: vpextrq $1, %xmm2, %rax
|
||||
; AVX2-NEXT: addq %r11, %rax
|
||||
; AVX2-NEXT: movq %rax, %r11
|
||||
; AVX2-NEXT: vmovq %xmm2, %rax
|
||||
; AVX2-NEXT: addq %r12, %rax
|
||||
; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
|
@ -2471,8 +2474,8 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
|
|||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
|
||||
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
|
||||
; AVX2-NEXT: vpextrq $1, %xmm2, %r12
|
||||
; AVX2-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload
|
||||
; AVX2-NEXT: vpextrq $1, %xmm2, %rbp
|
||||
; AVX2-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
|
||||
; AVX2-NEXT: vmovq %xmm2, %r9
|
||||
; AVX2-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload
|
||||
; AVX2-NEXT: vpextrq $1, %xmm1, %rax
|
||||
|
@ -2481,36 +2484,36 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
|
|||
; AVX2-NEXT: vmovq %xmm1, %rdx
|
||||
; AVX2-NEXT: vmovq %xmm0, %rsi
|
||||
; AVX2-NEXT: addq %rdx, %rsi
|
||||
; AVX2-NEXT: addq $-1, %rbp
|
||||
; AVX2-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX2-NEXT: movl $0, %eax
|
||||
; AVX2-NEXT: adcq $-1, %rax
|
||||
; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX2-NEXT: addq $-1, %r11
|
||||
; AVX2-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX2-NEXT: movl $0, %eax
|
||||
; AVX2-NEXT: adcq $-1, %rax
|
||||
; AVX2-NEXT: movq %rax, (%rsp) # 8-byte Spill
|
||||
; AVX2-NEXT: addq $-1, %r8
|
||||
; AVX2-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX2-NEXT: movl $0, %eax
|
||||
; AVX2-NEXT: adcq $-1, %rax
|
||||
; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX2-NEXT: addq $-1, %r14
|
||||
; AVX2-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX2-NEXT: movl $0, %ebp
|
||||
; AVX2-NEXT: adcq $-1, %rbp
|
||||
; AVX2-NEXT: addq $-1, %rbx
|
||||
; AVX2-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX2-NEXT: movl $0, %eax
|
||||
; AVX2-NEXT: adcq $-1, %rax
|
||||
; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX2-NEXT: addq $-1, %r13
|
||||
; AVX2-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX2-NEXT: movl $0, %eax
|
||||
; AVX2-NEXT: adcq $-1, %rax
|
||||
; AVX2-NEXT: movq %rax, (%rsp) # 8-byte Spill
|
||||
; AVX2-NEXT: addq $-1, %r10
|
||||
; AVX2-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX2-NEXT: movl $0, %r15d
|
||||
; AVX2-NEXT: adcq $-1, %r15
|
||||
; AVX2-NEXT: movl $0, %eax
|
||||
; AVX2-NEXT: adcq $-1, %rax
|
||||
; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX2-NEXT: addq $-1, %r14
|
||||
; AVX2-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX2-NEXT: movl $0, %r13d
|
||||
; AVX2-NEXT: adcq $-1, %r13
|
||||
; AVX2-NEXT: addq $-1, %rcx
|
||||
; AVX2-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX2-NEXT: movl $0, %eax
|
||||
; AVX2-NEXT: adcq $-1, %rax
|
||||
; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX2-NEXT: addq $-1, %r8
|
||||
; AVX2-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX2-NEXT: movl $0, %r15d
|
||||
; AVX2-NEXT: adcq $-1, %r15
|
||||
; AVX2-NEXT: addq $-1, %r11
|
||||
; AVX2-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX2-NEXT: movl $0, %ebx
|
||||
; AVX2-NEXT: adcq $-1, %rbx
|
||||
; AVX2-NEXT: addq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
|
||||
|
@ -2525,13 +2528,13 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
|
|||
; AVX2-NEXT: adcq $-1, %rax
|
||||
; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX2-NEXT: addq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
|
||||
; AVX2-NEXT: movl $0, %r13d
|
||||
; AVX2-NEXT: adcq $-1, %r13
|
||||
; AVX2-NEXT: movl $0, %r12d
|
||||
; AVX2-NEXT: adcq $-1, %r12
|
||||
; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
|
||||
; AVX2-NEXT: addq $-1, %rcx
|
||||
; AVX2-NEXT: movl $0, %r11d
|
||||
; AVX2-NEXT: adcq $-1, %r11
|
||||
; AVX2-NEXT: addq $-1, %r12
|
||||
; AVX2-NEXT: addq $-1, %rbp
|
||||
; AVX2-NEXT: movl $0, %r14d
|
||||
; AVX2-NEXT: adcq $-1, %r14
|
||||
; AVX2-NEXT: addq $-1, %r9
|
||||
|
@ -2547,10 +2550,10 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
|
|||
; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX2-NEXT: shldq $63, %rdi, %rdx
|
||||
; AVX2-NEXT: shldq $63, %r9, %r10
|
||||
; AVX2-NEXT: shldq $63, %r12, %r14
|
||||
; AVX2-NEXT: shldq $63, %rbp, %r14
|
||||
; AVX2-NEXT: shldq $63, %rcx, %r11
|
||||
; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
|
||||
; AVX2-NEXT: shldq $63, %rcx, %r13
|
||||
; AVX2-NEXT: shldq $63, %rcx, %r12
|
||||
; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
|
||||
; AVX2-NEXT: shldq $63, %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
|
||||
; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
|
||||
|
@ -2566,10 +2569,10 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
|
|||
; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
|
||||
; AVX2-NEXT: shldq $63, %rcx, %rax
|
||||
; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
|
||||
; AVX2-NEXT: shldq $63, %rcx, %rbp
|
||||
; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
|
||||
; AVX2-NEXT: shldq $63, %rcx, %r13
|
||||
; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
|
||||
; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
|
||||
; AVX2-NEXT: shldq $63, %rcx, %r12
|
||||
; AVX2-NEXT: shldq $63, %rcx, %rbp
|
||||
; AVX2-NEXT: movq (%rsp), %rdi # 8-byte Reload
|
||||
; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
|
||||
; AVX2-NEXT: shldq $63, %rcx, %rdi
|
||||
|
@ -2578,8 +2581,8 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
|
|||
; AVX2-NEXT: shldq $63, %rcx, %rsi
|
||||
; AVX2-NEXT: vmovq %rsi, %xmm8
|
||||
; AVX2-NEXT: vmovq %rdi, %xmm9
|
||||
; AVX2-NEXT: vmovq %r12, %xmm10
|
||||
; AVX2-NEXT: vmovq %rbp, %xmm11
|
||||
; AVX2-NEXT: vmovq %rbp, %xmm10
|
||||
; AVX2-NEXT: vmovq %r13, %xmm11
|
||||
; AVX2-NEXT: vmovq %rax, %xmm12
|
||||
; AVX2-NEXT: vmovq %r15, %xmm13
|
||||
; AVX2-NEXT: vmovq %rbx, %xmm14
|
||||
|
@ -2587,7 +2590,7 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
|
|||
; AVX2-NEXT: vmovq %r9, %xmm0
|
||||
; AVX2-NEXT: vmovq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Folded Reload
|
||||
; AVX2-NEXT: # xmm1 = mem[0],zero
|
||||
; AVX2-NEXT: vmovq %r13, %xmm2
|
||||
; AVX2-NEXT: vmovq %r12, %xmm2
|
||||
; AVX2-NEXT: vmovq %r11, %xmm3
|
||||
; AVX2-NEXT: vmovq %r14, %xmm4
|
||||
; AVX2-NEXT: vmovq %r10, %xmm5
|
||||
|
@ -2644,7 +2647,7 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
|
|||
; AVX512-NEXT: pushq %r13
|
||||
; AVX512-NEXT: pushq %r12
|
||||
; AVX512-NEXT: pushq %rbx
|
||||
; AVX512-NEXT: subq $16, %rsp
|
||||
; AVX512-NEXT: subq $24, %rsp
|
||||
; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
|
||||
; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
|
||||
; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
|
@ -2657,8 +2660,8 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
|
|||
; AVX512-NEXT: vmovq %xmm3, %rsi
|
||||
; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm2, %xmm3
|
||||
; AVX512-NEXT: vpextrq $1, %xmm3, %rcx
|
||||
; AVX512-NEXT: vmovq %xmm3, %r10
|
||||
; AVX512-NEXT: vpextrq $1, %xmm3, %rdx
|
||||
; AVX512-NEXT: vmovq %xmm3, %r8
|
||||
; AVX512-NEXT: vpextrq $1, %xmm2, %r13
|
||||
; AVX512-NEXT: vmovq %xmm2, %r12
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
|
||||
|
@ -2666,7 +2669,7 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
|
|||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm2
|
||||
; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm2, %xmm3
|
||||
; AVX512-NEXT: vpextrq $1, %xmm3, %rdx
|
||||
; AVX512-NEXT: vpextrq $1, %xmm3, %r15
|
||||
; AVX512-NEXT: vmovq %xmm3, %r14
|
||||
; AVX512-NEXT: vpextrq $1, %xmm2, %r9
|
||||
; AVX512-NEXT: vmovq %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
|
||||
|
@ -2678,34 +2681,35 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
|
|||
; AVX512-NEXT: vextracti128 $1, %ymm2, %xmm3
|
||||
; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm3, %xmm4
|
||||
; AVX512-NEXT: vpextrq $1, %xmm4, %r11
|
||||
; AVX512-NEXT: addq %rbx, %r11
|
||||
; AVX512-NEXT: vpextrq $1, %xmm4, %rax
|
||||
; AVX512-NEXT: addq %rbx, %rax
|
||||
; AVX512-NEXT: movq %rax, %rbx
|
||||
; AVX512-NEXT: vmovq %xmm4, %rax
|
||||
; AVX512-NEXT: addq %rbp, %rax
|
||||
; AVX512-NEXT: movq %rax, %rbx
|
||||
; AVX512-NEXT: vpextrq $1, %xmm3, %r8
|
||||
; AVX512-NEXT: addq %rdi, %r8
|
||||
; AVX512-NEXT: vmovq %xmm3, %r15
|
||||
; AVX512-NEXT: addq %rsi, %r15
|
||||
; AVX512-NEXT: movq %rax, %rbp
|
||||
; AVX512-NEXT: vpextrq $1, %xmm3, %rax
|
||||
; AVX512-NEXT: addq %rdi, %rax
|
||||
; AVX512-NEXT: movq %rax, %rdi
|
||||
; AVX512-NEXT: vmovq %xmm3, %r10
|
||||
; AVX512-NEXT: addq %rsi, %r10
|
||||
; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm2, %xmm3
|
||||
; AVX512-NEXT: vpextrq $1, %xmm3, %rdi
|
||||
; AVX512-NEXT: addq %rcx, %rdi
|
||||
; AVX512-NEXT: vpextrq $1, %xmm3, %rcx
|
||||
; AVX512-NEXT: addq %rdx, %rcx
|
||||
; AVX512-NEXT: vmovq %xmm3, %rax
|
||||
; AVX512-NEXT: addq %r10, %rax
|
||||
; AVX512-NEXT: movq %rax, %r10
|
||||
; AVX512-NEXT: addq %r8, %rax
|
||||
; AVX512-NEXT: movq %rax, %r8
|
||||
; AVX512-NEXT: vpextrq $1, %xmm2, %rsi
|
||||
; AVX512-NEXT: addq %r13, %rsi
|
||||
; AVX512-NEXT: vmovq %xmm2, %rax
|
||||
; AVX512-NEXT: addq %r12, %rax
|
||||
; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX512-NEXT: vmovq %xmm2, %r11
|
||||
; AVX512-NEXT: addq %r12, %r11
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm1, %xmm1
|
||||
; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm2, %xmm3
|
||||
; AVX512-NEXT: vpextrq $1, %xmm3, %rax
|
||||
; AVX512-NEXT: addq %rdx, %rax
|
||||
; AVX512-NEXT: addq %r15, %rax
|
||||
; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX512-NEXT: vmovq %xmm3, %rax
|
||||
; AVX512-NEXT: addq %r14, %rax
|
||||
|
@ -2718,33 +2722,24 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
|
|||
; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX512-NEXT: vpextrq $1, %xmm2, %rbp
|
||||
; AVX512-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
|
||||
; AVX512-NEXT: vpextrq $1, %xmm2, %rax
|
||||
; AVX512-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
|
||||
; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX512-NEXT: vmovq %xmm2, %r14
|
||||
; AVX512-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
|
||||
; AVX512-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; AVX512-NEXT: vpextrq $1, %xmm1, %r9
|
||||
; AVX512-NEXT: addq %rax, %r9
|
||||
; AVX512-NEXT: vmovq %xmm0, %rcx
|
||||
; AVX512-NEXT: vmovq %xmm0, %rax
|
||||
; AVX512-NEXT: vmovq %xmm1, %rdx
|
||||
; AVX512-NEXT: addq %rcx, %rdx
|
||||
; AVX512-NEXT: addq $-1, %r11
|
||||
; AVX512-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX512-NEXT: movl $0, %eax
|
||||
; AVX512-NEXT: adcq $-1, %rax
|
||||
; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX512-NEXT: addq %rax, %rdx
|
||||
; AVX512-NEXT: addq $-1, %rbx
|
||||
; AVX512-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX512-NEXT: movl $0, %eax
|
||||
; AVX512-NEXT: adcq $-1, %rax
|
||||
; AVX512-NEXT: movq %rax, (%rsp) # 8-byte Spill
|
||||
; AVX512-NEXT: addq $-1, %r8
|
||||
; AVX512-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX512-NEXT: movl $0, %eax
|
||||
; AVX512-NEXT: adcq $-1, %rax
|
||||
; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX512-NEXT: addq $-1, %r15
|
||||
; AVX512-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX512-NEXT: addq $-1, %rbp
|
||||
; AVX512-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX512-NEXT: movl $0, %eax
|
||||
; AVX512-NEXT: adcq $-1, %rax
|
||||
; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
|
@ -2752,94 +2747,108 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
|
|||
; AVX512-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX512-NEXT: movl $0, %eax
|
||||
; AVX512-NEXT: adcq $-1, %rax
|
||||
; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX512-NEXT: movq %rax, (%rsp) # 8-byte Spill
|
||||
; AVX512-NEXT: addq $-1, %r10
|
||||
; AVX512-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX512-NEXT: movl $0, %eax
|
||||
; AVX512-NEXT: adcq $-1, %rax
|
||||
; AVX512-NEXT: movq %rax, %rcx
|
||||
; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX512-NEXT: addq $-1, %rcx
|
||||
; AVX512-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX512-NEXT: movl $0, %eax
|
||||
; AVX512-NEXT: adcq $-1, %rax
|
||||
; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX512-NEXT: addq $-1, %r8
|
||||
; AVX512-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX512-NEXT: movl $0, %eax
|
||||
; AVX512-NEXT: adcq $-1, %rax
|
||||
; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX512-NEXT: addq $-1, %rsi
|
||||
; AVX512-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX512-NEXT: movl $0, %r13d
|
||||
; AVX512-NEXT: adcq $-1, %r13
|
||||
; AVX512-NEXT: addq $-1, %r11
|
||||
; AVX512-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX512-NEXT: movl $0, %r15d
|
||||
; AVX512-NEXT: adcq $-1, %r15
|
||||
; AVX512-NEXT: addq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
|
||||
; AVX512-NEXT: movl $0, %eax
|
||||
; AVX512-NEXT: adcq $-1, %rax
|
||||
; AVX512-NEXT: movq %rax, %rsi
|
||||
; AVX512-NEXT: addq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
|
||||
; AVX512-NEXT: movl $0, %r12d
|
||||
; AVX512-NEXT: adcq $-1, %r12
|
||||
; AVX512-NEXT: addq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
|
||||
; AVX512-NEXT: movl $0, %ebx
|
||||
; AVX512-NEXT: adcq $-1, %rbx
|
||||
; AVX512-NEXT: addq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
|
||||
; AVX512-NEXT: movl $0, %r13d
|
||||
; AVX512-NEXT: adcq $-1, %r13
|
||||
; AVX512-NEXT: addq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
|
||||
; AVX512-NEXT: movl $0, %r15d
|
||||
; AVX512-NEXT: adcq $-1, %r15
|
||||
; AVX512-NEXT: addq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
|
||||
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
|
||||
; AVX512-NEXT: addq $-1, %rbp
|
||||
; AVX512-NEXT: movl $0, %r11d
|
||||
; AVX512-NEXT: adcq $-1, %r11
|
||||
; AVX512-NEXT: addq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
|
||||
; AVX512-NEXT: movl $0, %r8d
|
||||
; AVX512-NEXT: adcq $-1, %r8
|
||||
; AVX512-NEXT: addq $-1, %rbp
|
||||
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
|
||||
; AVX512-NEXT: addq $-1, %rax
|
||||
; AVX512-NEXT: movl $0, %r10d
|
||||
; AVX512-NEXT: adcq $-1, %r10
|
||||
; AVX512-NEXT: addq $-1, %r14
|
||||
; AVX512-NEXT: movl $0, %r8d
|
||||
; AVX512-NEXT: adcq $-1, %r8
|
||||
; AVX512-NEXT: addq $-1, %r9
|
||||
; AVX512-NEXT: movl $0, %edi
|
||||
; AVX512-NEXT: adcq $-1, %rdi
|
||||
; AVX512-NEXT: addq $-1, %r9
|
||||
; AVX512-NEXT: movl $0, %esi
|
||||
; AVX512-NEXT: adcq $-1, %rsi
|
||||
; AVX512-NEXT: addq $-1, %rdx
|
||||
; AVX512-NEXT: movl $0, %eax
|
||||
; AVX512-NEXT: adcq $-1, %rax
|
||||
; AVX512-NEXT: shldq $63, %rdx, %rax
|
||||
; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX512-NEXT: shldq $63, %r9, %rsi
|
||||
; AVX512-NEXT: shldq $63, %r14, %rdi
|
||||
; AVX512-NEXT: shldq $63, %rbp, %r10
|
||||
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
|
||||
; AVX512-NEXT: shldq $63, %rdx, %r8
|
||||
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
|
||||
; AVX512-NEXT: shldq $63, %rdx, %r11
|
||||
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
|
||||
; AVX512-NEXT: shldq $63, %rdx, %r15
|
||||
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
|
||||
; AVX512-NEXT: shldq $63, %rdx, %r13
|
||||
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
|
||||
; AVX512-NEXT: shldq $63, %rax, %rbx
|
||||
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
|
||||
; AVX512-NEXT: shldq $63, %rax, %r12
|
||||
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
|
||||
; AVX512-NEXT: shldq $63, %rax, %rcx
|
||||
; AVX512-NEXT: movl $0, %ecx
|
||||
; AVX512-NEXT: adcq $-1, %rcx
|
||||
; AVX512-NEXT: shldq $63, %rdx, %rcx
|
||||
; AVX512-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX512-NEXT: shldq $63, %r9, %rdi
|
||||
; AVX512-NEXT: shldq $63, %r14, %r8
|
||||
; AVX512-NEXT: shldq $63, %rax, %r10
|
||||
; AVX512-NEXT: shldq $63, %rbp, %r11
|
||||
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
|
||||
; AVX512-NEXT: shldq $63, %rdx, %rbx
|
||||
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
|
||||
; AVX512-NEXT: shldq $63, %rdx, %r12
|
||||
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
|
||||
; AVX512-NEXT: shldq $63, %rdx, %rsi
|
||||
; AVX512-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
|
||||
; AVX512-NEXT: shldq $63, %rax, %r15
|
||||
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
|
||||
; AVX512-NEXT: shldq $63, %rax, %r13
|
||||
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
|
||||
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
|
||||
; AVX512-NEXT: shldq $63, %rax, %rsi
|
||||
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
|
||||
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
|
||||
; AVX512-NEXT: shldq $63, %rax, %rcx
|
||||
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
|
||||
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
|
||||
; AVX512-NEXT: shldq $63, %rdx, %rax
|
||||
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
|
||||
; AVX512-NEXT: movq (%rsp), %r14 # 8-byte Reload
|
||||
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
|
||||
; AVX512-NEXT: shldq $63, %rdx, %r14
|
||||
; AVX512-NEXT: movq (%rsp), %r9 # 8-byte Reload
|
||||
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
|
||||
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
|
||||
; AVX512-NEXT: shldq $63, %rdx, %r9
|
||||
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
|
||||
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
|
||||
; AVX512-NEXT: shldq $63, %rbp, %rdx
|
||||
; AVX512-NEXT: vmovq %rdx, %xmm8
|
||||
; AVX512-NEXT: shldq $63, %rdx, %rbp
|
||||
; AVX512-NEXT: vmovq %rbp, %xmm8
|
||||
; AVX512-NEXT: vmovq %r9, %xmm9
|
||||
; AVX512-NEXT: vmovq %r14, %xmm10
|
||||
; AVX512-NEXT: vmovq %rax, %xmm11
|
||||
; AVX512-NEXT: vmovq %rcx, %xmm12
|
||||
; AVX512-NEXT: vmovq {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 8-byte Folded Reload
|
||||
; AVX512-NEXT: # xmm13 = mem[0],zero
|
||||
; AVX512-NEXT: vmovq %r12, %xmm14
|
||||
; AVX512-NEXT: vmovq %rbx, %xmm15
|
||||
; AVX512-NEXT: vmovq %r13, %xmm0
|
||||
; AVX512-NEXT: vmovq %r15, %xmm1
|
||||
; AVX512-NEXT: vmovq %r11, %xmm2
|
||||
; AVX512-NEXT: vmovq %r8, %xmm3
|
||||
; AVX512-NEXT: vmovq %rsi, %xmm13
|
||||
; AVX512-NEXT: vmovq %r13, %xmm14
|
||||
; AVX512-NEXT: vmovq %r15, %xmm15
|
||||
; AVX512-NEXT: vmovq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
|
||||
; AVX512-NEXT: # xmm0 = mem[0],zero
|
||||
; AVX512-NEXT: vmovq %r12, %xmm1
|
||||
; AVX512-NEXT: vmovq %rbx, %xmm2
|
||||
; AVX512-NEXT: vmovq %r11, %xmm3
|
||||
; AVX512-NEXT: vmovq %r10, %xmm4
|
||||
; AVX512-NEXT: vmovq %rdi, %xmm5
|
||||
; AVX512-NEXT: vmovq %rsi, %xmm6
|
||||
; AVX512-NEXT: vmovq %r8, %xmm5
|
||||
; AVX512-NEXT: vmovq %rdi, %xmm6
|
||||
; AVX512-NEXT: vmovq {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 8-byte Folded Reload
|
||||
; AVX512-NEXT: # xmm7 = mem[0],zero
|
||||
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm8 = xmm9[0],xmm8[0]
|
||||
|
@ -2860,7 +2869,7 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
|
|||
; AVX512-NEXT: vpmovqd %zmm0, %ymm0
|
||||
; AVX512-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
|
||||
; AVX512-NEXT: vpmovdb %zmm0, (%rax)
|
||||
; AVX512-NEXT: addq $16, %rsp
|
||||
; AVX512-NEXT: addq $24, %rsp
|
||||
; AVX512-NEXT: popq %rbx
|
||||
; AVX512-NEXT: popq %r12
|
||||
; AVX512-NEXT: popq %r13
|
||||
|
|
|
@ -77,11 +77,11 @@ define i64 @addressModeWith32bitIndex(i32 %V) {
|
|||
; CHECK-NEXT: movq %rsp, %rbp
|
||||
; CHECK-NEXT: .cfi_def_cfa_register %rbp
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
|
||||
; CHECK-NEXT: movl %eax, %ecx
|
||||
; CHECK-NEXT: movq %rcx, %rax
|
||||
; CHECK-NEXT: cqto
|
||||
; CHECK-NEXT: movslq %edi, %rcx
|
||||
; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi ## 8-byte Reload
|
||||
; CHECK-NEXT: idivq (%rsi,%rcx,8)
|
||||
; CHECK-NEXT: movslq %edi, %rsi
|
||||
; CHECK-NEXT: idivq (%rcx,%rsi,8)
|
||||
; CHECK-NEXT: popq %rbp
|
||||
; CHECK-NEXT: retq
|
||||
%gep = getelementptr i64, i64* null, i32 %V
|
||||
|
|
|
@ -2,7 +2,9 @@
|
|||
|
||||
; Check no spills to the same stack slot after hoisting.
|
||||
; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET1:-?[0-9]*]](%rsp)
|
||||
; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET2:-?[0-9]*]](%rsp)
|
||||
; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET1]](%rsp)
|
||||
; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET2]](%rsp)
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
|
|
@ -133,26 +133,25 @@ return:
|
|||
define i8* @bsd_memchr(i8* %s, i32 %a, i32 %c, i64 %n) nounwind ssp {
|
||||
; CHECK-LABEL: bsd_memchr:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: testq %rcx, %rcx
|
||||
; CHECK-NEXT: je .LBB3_5
|
||||
; CHECK-NEXT: je .LBB3_4
|
||||
; CHECK-NEXT: # %bb.1: # %preheader
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: movzbl %dl, %edx
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB3_2: # %do.body
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: cmpl %edx, %esi
|
||||
; CHECK-NEXT: je .LBB3_3
|
||||
; CHECK-NEXT: # %bb.4: # %do.cond
|
||||
; CHECK-NEXT: je .LBB3_5
|
||||
; CHECK-NEXT: # %bb.3: # %do.cond
|
||||
; CHECK-NEXT: # in Loop: Header=BB3_2 Depth=1
|
||||
; CHECK-NEXT: incq %rdi
|
||||
; CHECK-NEXT: incq %rax
|
||||
; CHECK-NEXT: decq %rcx
|
||||
; CHECK-NEXT: jne .LBB3_2
|
||||
; CHECK-NEXT: .LBB3_4:
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: .LBB3_5: # %return
|
||||
; CHECK-NEXT: retq
|
||||
; CHECK-NEXT: .LBB3_3:
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%cmp = icmp eq i64 %n, 0
|
||||
br i1 %cmp, label %return, label %preheader
|
||||
|
|
|
@ -356,7 +356,7 @@ define i32 @_Z10test_shortPsS_i_1024(i16* nocapture readonly, i16* nocapture rea
|
|||
; SSE2-NEXT: xorl %ecx, %ecx
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm2
|
||||
; SSE2-NEXT: pxor %xmm4, %xmm4
|
||||
; SSE2-NEXT: pxor %xmm0, %xmm0
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm3, %xmm3
|
||||
; SSE2-NEXT: .p2align 4, 0x90
|
||||
; SSE2-NEXT: .LBB3_1: # %vector.body
|
||||
|
@ -365,18 +365,18 @@ define i32 @_Z10test_shortPsS_i_1024(i16* nocapture readonly, i16* nocapture rea
|
|||
; SSE2-NEXT: movdqu 16(%rdi,%rcx,2), %xmm6
|
||||
; SSE2-NEXT: movdqu 32(%rdi,%rcx,2), %xmm7
|
||||
; SSE2-NEXT: movdqu 48(%rdi,%rcx,2), %xmm9
|
||||
; SSE2-NEXT: movdqu (%rsi,%rcx,2), %xmm1
|
||||
; SSE2-NEXT: pmaddwd %xmm5, %xmm1
|
||||
; SSE2-NEXT: paddd %xmm1, %xmm2
|
||||
; SSE2-NEXT: movdqu 16(%rsi,%rcx,2), %xmm1
|
||||
; SSE2-NEXT: pmaddwd %xmm6, %xmm1
|
||||
; SSE2-NEXT: paddd %xmm1, %xmm4
|
||||
; SSE2-NEXT: movdqu 32(%rsi,%rcx,2), %xmm1
|
||||
; SSE2-NEXT: pmaddwd %xmm7, %xmm1
|
||||
; SSE2-NEXT: paddd %xmm1, %xmm0
|
||||
; SSE2-NEXT: movdqu 48(%rsi,%rcx,2), %xmm1
|
||||
; SSE2-NEXT: pmaddwd %xmm9, %xmm1
|
||||
; SSE2-NEXT: paddd %xmm1, %xmm3
|
||||
; SSE2-NEXT: movdqu (%rsi,%rcx,2), %xmm0
|
||||
; SSE2-NEXT: pmaddwd %xmm5, %xmm0
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm2
|
||||
; SSE2-NEXT: movdqu 16(%rsi,%rcx,2), %xmm0
|
||||
; SSE2-NEXT: pmaddwd %xmm6, %xmm0
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm4
|
||||
; SSE2-NEXT: movdqu 32(%rsi,%rcx,2), %xmm0
|
||||
; SSE2-NEXT: pmaddwd %xmm7, %xmm0
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
; SSE2-NEXT: movdqu 48(%rsi,%rcx,2), %xmm0
|
||||
; SSE2-NEXT: pmaddwd %xmm9, %xmm0
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm3
|
||||
; SSE2-NEXT: addq $16, %rcx
|
||||
; SSE2-NEXT: cmpq %rcx, %rax
|
||||
; SSE2-NEXT: jne .LBB3_1
|
||||
|
@ -385,14 +385,14 @@ define i32 @_Z10test_shortPsS_i_1024(i16* nocapture readonly, i16* nocapture rea
|
|||
; SSE2-NEXT: paddd %xmm8, %xmm3
|
||||
; SSE2-NEXT: paddd %xmm4, %xmm3
|
||||
; SSE2-NEXT: paddd %xmm8, %xmm2
|
||||
; SSE2-NEXT: paddd %xmm8, %xmm0
|
||||
; SSE2-NEXT: paddd %xmm3, %xmm0
|
||||
; SSE2-NEXT: paddd %xmm2, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; SSE2-NEXT: paddd %xmm8, %xmm1
|
||||
; SSE2-NEXT: paddd %xmm3, %xmm1
|
||||
; SSE2-NEXT: paddd %xmm2, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: paddd %xmm1, %xmm0
|
||||
; SSE2-NEXT: movd %xmm0, %eax
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
; SSE2-NEXT: movd %xmm1, %eax
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: _Z10test_shortPsS_i_1024:
|
||||
|
@ -949,7 +949,7 @@ define i32 @_Z9test_charPcS_i_1024(i8* nocapture readonly, i8* nocapture readonl
|
|||
; SSE2-NEXT: xorl %ecx, %ecx
|
||||
; SSE2-NEXT: pxor %xmm9, %xmm9
|
||||
; SSE2-NEXT: pxor %xmm4, %xmm4
|
||||
; SSE2-NEXT: pxor %xmm0, %xmm0
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm3, %xmm3
|
||||
; SSE2-NEXT: .p2align 4, 0x90
|
||||
; SSE2-NEXT: .LBB7_1: # %vector.body
|
||||
|
@ -963,9 +963,9 @@ define i32 @_Z9test_charPcS_i_1024(i8* nocapture readonly, i8* nocapture readonl
|
|||
; SSE2-NEXT: movq {{.*#+}} xmm7 = mem[0],zero
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: psraw $8, %xmm7
|
||||
; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: psraw $8, %xmm1
|
||||
; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: psraw $8, %xmm0
|
||||
; SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: psraw $8, %xmm2
|
||||
|
@ -980,11 +980,11 @@ define i32 @_Z9test_charPcS_i_1024(i8* nocapture readonly, i8* nocapture readonl
|
|||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: psraw $8, %xmm2
|
||||
; SSE2-NEXT: pmaddwd %xmm7, %xmm2
|
||||
; SSE2-NEXT: paddd %xmm2, %xmm0
|
||||
; SSE2-NEXT: paddd %xmm2, %xmm1
|
||||
; SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: psraw $8, %xmm2
|
||||
; SSE2-NEXT: pmaddwd %xmm1, %xmm2
|
||||
; SSE2-NEXT: pmaddwd %xmm0, %xmm2
|
||||
; SSE2-NEXT: paddd %xmm2, %xmm3
|
||||
; SSE2-NEXT: addq $32, %rcx
|
||||
; SSE2-NEXT: cmpq %rcx, %rax
|
||||
|
@ -994,14 +994,14 @@ define i32 @_Z9test_charPcS_i_1024(i8* nocapture readonly, i8* nocapture readonl
|
|||
; SSE2-NEXT: paddd %xmm8, %xmm3
|
||||
; SSE2-NEXT: paddd %xmm4, %xmm3
|
||||
; SSE2-NEXT: paddd %xmm8, %xmm9
|
||||
; SSE2-NEXT: paddd %xmm8, %xmm0
|
||||
; SSE2-NEXT: paddd %xmm3, %xmm0
|
||||
; SSE2-NEXT: paddd %xmm9, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; SSE2-NEXT: paddd %xmm8, %xmm1
|
||||
; SSE2-NEXT: paddd %xmm3, %xmm1
|
||||
; SSE2-NEXT: paddd %xmm9, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: paddd %xmm1, %xmm0
|
||||
; SSE2-NEXT: movd %xmm0, %eax
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
; SSE2-NEXT: movd %xmm1, %eax
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: _Z9test_charPcS_i_1024:
|
||||
|
|
|
@ -604,13 +604,12 @@ define <1 x i64> @test3(<1 x i64>* %a, <1 x i64>* %b, i32 %count) nounwind {
|
|||
;
|
||||
; X64-LABEL: test3:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: xorl %r8d, %r8d
|
||||
; X64-NEXT: xorl %eax, %eax
|
||||
; X64-NEXT: testl %edx, %edx
|
||||
; X64-NEXT: je .LBB3_3
|
||||
; X64-NEXT: # %bb.1: # %bb26.preheader
|
||||
; X64-NEXT: xorl %r8d, %r8d
|
||||
; X64-NEXT: je .LBB3_2
|
||||
; X64-NEXT: .p2align 4, 0x90
|
||||
; X64-NEXT: .LBB3_2: # %bb26
|
||||
; X64-NEXT: .LBB3_1: # %bb26
|
||||
; X64-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X64-NEXT: movslq %r8d, %r8
|
||||
; X64-NEXT: movq (%rdi,%r8,8), %rcx
|
||||
|
@ -618,8 +617,8 @@ define <1 x i64> @test3(<1 x i64>* %a, <1 x i64>* %b, i32 %count) nounwind {
|
|||
; X64-NEXT: addq %rcx, %rax
|
||||
; X64-NEXT: incl %r8d
|
||||
; X64-NEXT: cmpl %edx, %r8d
|
||||
; X64-NEXT: jb .LBB3_2
|
||||
; X64-NEXT: .LBB3_3: # %bb31
|
||||
; X64-NEXT: jb .LBB3_1
|
||||
; X64-NEXT: .LBB3_2: # %bb31
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%tmp2942 = icmp eq i32 %count, 0
|
||||
|
|
|
@ -10,12 +10,13 @@ define void @foo() {
|
|||
; X86-O0-LABEL: foo:
|
||||
; X86-O0: # %bb.0: # %entry
|
||||
; X86-O0-NEXT: xorl %eax, %eax
|
||||
; X86-O0-NEXT: xorl %ecx, %ecx
|
||||
; X86-O0-NEXT: movl %eax, %ecx
|
||||
; X86-O0-NEXT: xorl %eax, %eax
|
||||
; X86-O0-NEXT: movzbl c, %edx
|
||||
; X86-O0-NEXT: subl %edx, %ecx
|
||||
; X86-O0-NEXT: movslq %ecx, %rsi
|
||||
; X86-O0-NEXT: subq %rsi, %rax
|
||||
; X86-O0-NEXT: movb %al, %dil
|
||||
; X86-O0-NEXT: subl %edx, %eax
|
||||
; X86-O0-NEXT: movslq %eax, %rsi
|
||||
; X86-O0-NEXT: subq %rsi, %rcx
|
||||
; X86-O0-NEXT: movb %cl, %dil
|
||||
; X86-O0-NEXT: cmpb $0, %dil
|
||||
; X86-O0-NEXT: setne %dil
|
||||
; X86-O0-NEXT: andb $1, %dil
|
||||
|
@ -25,13 +26,13 @@ define void @foo() {
|
|||
; X86-O0-NEXT: xorb $-1, %dil
|
||||
; X86-O0-NEXT: xorb $-1, %dil
|
||||
; X86-O0-NEXT: andb $1, %dil
|
||||
; X86-O0-NEXT: movzbl %dil, %ecx
|
||||
; X86-O0-NEXT: movzbl %dil, %eax
|
||||
; X86-O0-NEXT: movzbl c, %edx
|
||||
; X86-O0-NEXT: cmpl %edx, %ecx
|
||||
; X86-O0-NEXT: cmpl %edx, %eax
|
||||
; X86-O0-NEXT: setle %dil
|
||||
; X86-O0-NEXT: andb $1, %dil
|
||||
; X86-O0-NEXT: movzbl %dil, %ecx
|
||||
; X86-O0-NEXT: movl %ecx, -{{[0-9]+}}(%rsp)
|
||||
; X86-O0-NEXT: movzbl %dil, %eax
|
||||
; X86-O0-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
|
||||
; X86-O0-NEXT: retq
|
||||
;
|
||||
; X64-LABEL: foo:
|
||||
|
|
|
@ -14,21 +14,22 @@ define void @foo() {
|
|||
; X64-LABEL: foo:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: xorl %eax, %eax
|
||||
; X64-NEXT: movl %eax, %ecx
|
||||
; X64-NEXT: movw $0, var_825
|
||||
; X64-NEXT: movzwl var_32, %ecx
|
||||
; X64-NEXT: movzwl var_32, %eax
|
||||
; X64-NEXT: movzwl var_901, %edx
|
||||
; X64-NEXT: movl %ecx, %esi
|
||||
; X64-NEXT: movl %eax, %esi
|
||||
; X64-NEXT: xorl %edx, %esi
|
||||
; X64-NEXT: movl %ecx, %edx
|
||||
; X64-NEXT: movl %eax, %edx
|
||||
; X64-NEXT: xorl %esi, %edx
|
||||
; X64-NEXT: addl %ecx, %edx
|
||||
; X64-NEXT: addl %eax, %edx
|
||||
; X64-NEXT: movslq %edx, %rdi
|
||||
; X64-NEXT: movq %rdi, var_826
|
||||
; X64-NEXT: movzwl var_32, %ecx
|
||||
; X64-NEXT: movl %ecx, %edi
|
||||
; X64-NEXT: movzwl var_901, %ecx
|
||||
; X64-NEXT: xorl $51981, %ecx # imm = 0xCB0D
|
||||
; X64-NEXT: movslq %ecx, %r8
|
||||
; X64-NEXT: movzwl var_32, %eax
|
||||
; X64-NEXT: movl %eax, %edi
|
||||
; X64-NEXT: movzwl var_901, %eax
|
||||
; X64-NEXT: xorl $51981, %eax # imm = 0xCB0D
|
||||
; X64-NEXT: movslq %eax, %r8
|
||||
; X64-NEXT: movabsq $-1142377792914660288, %r9 # imm = 0xF02575732E06E440
|
||||
; X64-NEXT: xorq %r9, %r8
|
||||
; X64-NEXT: movq %rdi, %r9
|
||||
|
@ -40,11 +41,11 @@ define void @foo() {
|
|||
; X64-NEXT: orq %r8, %rdi
|
||||
; X64-NEXT: movw %di, %r10w
|
||||
; X64-NEXT: movw %r10w, var_900
|
||||
; X64-NEXT: cmpq var_28, %rax
|
||||
; X64-NEXT: cmpq var_28, %rcx
|
||||
; X64-NEXT: setne %r11b
|
||||
; X64-NEXT: andb $1, %r11b
|
||||
; X64-NEXT: movzbl %r11b, %ecx
|
||||
; X64-NEXT: movw %cx, %r10w
|
||||
; X64-NEXT: movzbl %r11b, %eax
|
||||
; X64-NEXT: movw %ax, %r10w
|
||||
; X64-NEXT: movw %r10w, var_827
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
|
|
|
@ -20,18 +20,18 @@ define i256 @test1(i256 %a) nounwind {
|
|||
; ILP-NEXT: pushq %rbx
|
||||
; ILP-NEXT: movq %rcx, %r9
|
||||
; ILP-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; ILP-NEXT: xorl %eax, %eax
|
||||
; ILP-NEXT: addq $1, %rsi
|
||||
; ILP-NEXT: adcq $0, %rdx
|
||||
; ILP-NEXT: adcq $0, %r9
|
||||
; ILP-NEXT: adcq $0, %r8
|
||||
; ILP-NEXT: leal 1(%rsi,%rsi), %edi
|
||||
; ILP-NEXT: movl $1, %ebp
|
||||
; ILP-NEXT: xorl %eax, %eax
|
||||
; ILP-NEXT: xorl %r11d, %r11d
|
||||
; ILP-NEXT: xorl %r14d, %r14d
|
||||
; ILP-NEXT: movl %edi, %ecx
|
||||
; ILP-NEXT: shldq %cl, %rbp, %r11
|
||||
; ILP-NEXT: movl $1, %r14d
|
||||
; ILP-NEXT: shlq %cl, %r14
|
||||
; ILP-NEXT: shldq %cl, %rbp, %r14
|
||||
; ILP-NEXT: movl $1, %r11d
|
||||
; ILP-NEXT: shlq %cl, %r11
|
||||
; ILP-NEXT: movb $-128, %r10b
|
||||
; ILP-NEXT: subb %dil, %r10b
|
||||
; ILP-NEXT: movq %r9, %r13
|
||||
|
@ -42,33 +42,33 @@ define i256 @test1(i256 %a) nounwind {
|
|||
; ILP-NEXT: xorl %r15d, %r15d
|
||||
; ILP-NEXT: movl %edi, %ecx
|
||||
; ILP-NEXT: shldq %cl, %r15, %r15
|
||||
; ILP-NEXT: movq %rsi, %rbp
|
||||
; ILP-NEXT: shrdq %cl, %rdx, %rbp
|
||||
; ILP-NEXT: movq %rsi, %rbx
|
||||
; ILP-NEXT: shrdq %cl, %rdx, %rbx
|
||||
; ILP-NEXT: shrq %cl, %rdx
|
||||
; ILP-NEXT: addb $-128, %cl
|
||||
; ILP-NEXT: shrdq %cl, %r8, %r9
|
||||
; ILP-NEXT: testb $64, %dil
|
||||
; ILP-NEXT: cmovneq %r14, %r11
|
||||
; ILP-NEXT: cmoveq %rbp, %rdx
|
||||
; ILP-NEXT: cmovneq %r11, %r14
|
||||
; ILP-NEXT: cmoveq %rbx, %rdx
|
||||
; ILP-NEXT: cmovneq %rax, %r15
|
||||
; ILP-NEXT: cmovneq %rax, %r14
|
||||
; ILP-NEXT: cmovneq %rax, %r11
|
||||
; ILP-NEXT: testb $64, %r10b
|
||||
; ILP-NEXT: cmovneq %rax, %r12
|
||||
; ILP-NEXT: cmovneq %rax, %r13
|
||||
; ILP-NEXT: movl $1, %ebp
|
||||
; ILP-NEXT: shlq %cl, %rbp
|
||||
; ILP-NEXT: movl $1, %ebx
|
||||
; ILP-NEXT: shlq %cl, %rbx
|
||||
; ILP-NEXT: orl %edx, %r13d
|
||||
; ILP-NEXT: xorl %edx, %edx
|
||||
; ILP-NEXT: movl $1, %ebx
|
||||
; ILP-NEXT: shldq %cl, %rbx, %rdx
|
||||
; ILP-NEXT: movl $1, %ebp
|
||||
; ILP-NEXT: shldq %cl, %rbp, %rdx
|
||||
; ILP-NEXT: shrq %cl, %r8
|
||||
; ILP-NEXT: testb $64, %cl
|
||||
; ILP-NEXT: cmoveq %r9, %r8
|
||||
; ILP-NEXT: cmovneq %rbp, %rdx
|
||||
; ILP-NEXT: cmovneq %rax, %rbp
|
||||
; ILP-NEXT: cmovneq %rbx, %rdx
|
||||
; ILP-NEXT: cmovneq %rax, %rbx
|
||||
; ILP-NEXT: testb %dil, %dil
|
||||
; ILP-NEXT: cmovsq %rax, %r11
|
||||
; ILP-NEXT: cmovsq %rax, %r14
|
||||
; ILP-NEXT: cmovsq %rax, %r11
|
||||
; ILP-NEXT: jns .LBB0_2
|
||||
; ILP-NEXT: # %bb.1:
|
||||
; ILP-NEXT: movl %r8d, %r13d
|
||||
|
@ -77,20 +77,20 @@ define i256 @test1(i256 %a) nounwind {
|
|||
; ILP-NEXT: # %bb.3:
|
||||
; ILP-NEXT: movl %r13d, %esi
|
||||
; ILP-NEXT: .LBB0_4:
|
||||
; ILP-NEXT: cmovnsq %r12, %rbp
|
||||
; ILP-NEXT: cmoveq %rax, %rbp
|
||||
; ILP-NEXT: cmovnsq %r12, %rbx
|
||||
; ILP-NEXT: cmoveq %rax, %rbx
|
||||
; ILP-NEXT: cmovnsq %r15, %rdx
|
||||
; ILP-NEXT: cmoveq %rax, %rdx
|
||||
; ILP-NEXT: testb $1, %sil
|
||||
; ILP-NEXT: cmovneq %rax, %rdx
|
||||
; ILP-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
|
||||
; ILP-NEXT: movq %rdx, 24(%rax)
|
||||
; ILP-NEXT: cmovneq %rax, %rbp
|
||||
; ILP-NEXT: movq %rbp, 16(%rax)
|
||||
; ILP-NEXT: cmovneq %rax, %r11
|
||||
; ILP-NEXT: movq %r11, 8(%rax)
|
||||
; ILP-NEXT: cmovneq %rax, %rbx
|
||||
; ILP-NEXT: movq %rbx, 16(%rax)
|
||||
; ILP-NEXT: cmovneq %rax, %r14
|
||||
; ILP-NEXT: movq %r14, (%rax)
|
||||
; ILP-NEXT: movq %r14, 8(%rax)
|
||||
; ILP-NEXT: cmovneq %rax, %r11
|
||||
; ILP-NEXT: movq %r11, (%rax)
|
||||
; ILP-NEXT: popq %rbx
|
||||
; ILP-NEXT: popq %r12
|
||||
; ILP-NEXT: popq %r13
|
||||
|
@ -101,6 +101,7 @@ define i256 @test1(i256 %a) nounwind {
|
|||
;
|
||||
; HYBRID-LABEL: test1:
|
||||
; HYBRID: # %bb.0:
|
||||
; HYBRID-NEXT: pushq %rbp
|
||||
; HYBRID-NEXT: pushq %r15
|
||||
; HYBRID-NEXT: pushq %r14
|
||||
; HYBRID-NEXT: pushq %r13
|
||||
|
@ -112,82 +113,84 @@ define i256 @test1(i256 %a) nounwind {
|
|||
; HYBRID-NEXT: adcq $0, %rdx
|
||||
; HYBRID-NEXT: adcq $0, %r9
|
||||
; HYBRID-NEXT: adcq $0, %r8
|
||||
; HYBRID-NEXT: xorl %r10d, %r10d
|
||||
; HYBRID-NEXT: leal 1(%rsi,%rsi), %edi
|
||||
; HYBRID-NEXT: xorl %r14d, %r14d
|
||||
; HYBRID-NEXT: xorl %r15d, %r15d
|
||||
; HYBRID-NEXT: movl %edi, %ecx
|
||||
; HYBRID-NEXT: shldq %cl, %r15, %r15
|
||||
; HYBRID-NEXT: shldq %cl, %r14, %r14
|
||||
; HYBRID-NEXT: testb $64, %dil
|
||||
; HYBRID-NEXT: cmovneq %r14, %r15
|
||||
; HYBRID-NEXT: movl $1, %r11d
|
||||
; HYBRID-NEXT: cmovneq %r10, %r14
|
||||
; HYBRID-NEXT: movl $1, %ebp
|
||||
; HYBRID-NEXT: movl $1, %r12d
|
||||
; HYBRID-NEXT: shlq %cl, %r12
|
||||
; HYBRID-NEXT: testb $64, %dil
|
||||
; HYBRID-NEXT: movq %r12, %r10
|
||||
; HYBRID-NEXT: cmovneq %r14, %r10
|
||||
; HYBRID-NEXT: movq %r12, %r11
|
||||
; HYBRID-NEXT: cmovneq %r10, %r11
|
||||
; HYBRID-NEXT: movq %rsi, %rbx
|
||||
; HYBRID-NEXT: shrdq %cl, %rdx, %rbx
|
||||
; HYBRID-NEXT: shrq %cl, %rdx
|
||||
; HYBRID-NEXT: testb $64, %dil
|
||||
; HYBRID-NEXT: cmoveq %rbx, %rdx
|
||||
; HYBRID-NEXT: xorl %r13d, %r13d
|
||||
; HYBRID-NEXT: shldq %cl, %r11, %r13
|
||||
; HYBRID-NEXT: xorl %r15d, %r15d
|
||||
; HYBRID-NEXT: shldq %cl, %rbp, %r15
|
||||
; HYBRID-NEXT: testb $64, %dil
|
||||
; HYBRID-NEXT: cmovneq %r12, %r13
|
||||
; HYBRID-NEXT: cmovneq %r12, %r15
|
||||
; HYBRID-NEXT: movb $-128, %cl
|
||||
; HYBRID-NEXT: subb %dil, %cl
|
||||
; HYBRID-NEXT: movq %r9, %rbx
|
||||
; HYBRID-NEXT: shlq %cl, %rbx
|
||||
; HYBRID-NEXT: movq %r9, %r13
|
||||
; HYBRID-NEXT: shlq %cl, %r13
|
||||
; HYBRID-NEXT: movl $1, %r12d
|
||||
; HYBRID-NEXT: shrdq %cl, %r14, %r12
|
||||
; HYBRID-NEXT: shrdq %cl, %r10, %r12
|
||||
; HYBRID-NEXT: testb $64, %cl
|
||||
; HYBRID-NEXT: cmovneq %r14, %r12
|
||||
; HYBRID-NEXT: cmovneq %r14, %rbx
|
||||
; HYBRID-NEXT: orl %edx, %ebx
|
||||
; HYBRID-NEXT: cmovneq %r10, %r12
|
||||
; HYBRID-NEXT: cmovneq %r10, %r13
|
||||
; HYBRID-NEXT: orl %edx, %r13d
|
||||
; HYBRID-NEXT: movl %edi, %ecx
|
||||
; HYBRID-NEXT: addb $-128, %cl
|
||||
; HYBRID-NEXT: shrdq %cl, %r8, %r9
|
||||
; HYBRID-NEXT: shrq %cl, %r8
|
||||
; HYBRID-NEXT: xorl %edx, %edx
|
||||
; HYBRID-NEXT: shldq %cl, %r11, %rdx
|
||||
; HYBRID-NEXT: shlq %cl, %r11
|
||||
; HYBRID-NEXT: shldq %cl, %rbp, %rdx
|
||||
; HYBRID-NEXT: shlq %cl, %rbp
|
||||
; HYBRID-NEXT: testb $64, %cl
|
||||
; HYBRID-NEXT: cmovneq %r11, %rdx
|
||||
; HYBRID-NEXT: cmovneq %rbp, %rdx
|
||||
; HYBRID-NEXT: cmoveq %r9, %r8
|
||||
; HYBRID-NEXT: cmovneq %r14, %r11
|
||||
; HYBRID-NEXT: cmovneq %r10, %rbp
|
||||
; HYBRID-NEXT: testb %dil, %dil
|
||||
; HYBRID-NEXT: jns .LBB0_2
|
||||
; HYBRID-NEXT: # %bb.1:
|
||||
; HYBRID-NEXT: movl %r8d, %ebx
|
||||
; HYBRID-NEXT: movl %r8d, %r13d
|
||||
; HYBRID-NEXT: .LBB0_2:
|
||||
; HYBRID-NEXT: je .LBB0_4
|
||||
; HYBRID-NEXT: # %bb.3:
|
||||
; HYBRID-NEXT: movl %ebx, %esi
|
||||
; HYBRID-NEXT: movl %r13d, %esi
|
||||
; HYBRID-NEXT: .LBB0_4:
|
||||
; HYBRID-NEXT: cmovsq %r14, %r13
|
||||
; HYBRID-NEXT: cmovnsq %r12, %r11
|
||||
; HYBRID-NEXT: cmoveq %r14, %r11
|
||||
; HYBRID-NEXT: cmovnsq %r15, %rdx
|
||||
; HYBRID-NEXT: cmoveq %r14, %rdx
|
||||
; HYBRID-NEXT: cmovsq %r14, %r10
|
||||
; HYBRID-NEXT: cmovsq %r10, %r15
|
||||
; HYBRID-NEXT: cmovnsq %r12, %rbp
|
||||
; HYBRID-NEXT: cmoveq %r10, %rbp
|
||||
; HYBRID-NEXT: cmovnsq %r14, %rdx
|
||||
; HYBRID-NEXT: cmoveq %r10, %rdx
|
||||
; HYBRID-NEXT: cmovsq %r10, %r11
|
||||
; HYBRID-NEXT: testb $1, %sil
|
||||
; HYBRID-NEXT: cmovneq %rax, %rdx
|
||||
; HYBRID-NEXT: movq %rdx, 24(%rax)
|
||||
; HYBRID-NEXT: cmovneq %rax, %rbp
|
||||
; HYBRID-NEXT: movq %rbp, 16(%rax)
|
||||
; HYBRID-NEXT: cmovneq %rax, %r15
|
||||
; HYBRID-NEXT: movq %r15, 8(%rax)
|
||||
; HYBRID-NEXT: cmovneq %rax, %r11
|
||||
; HYBRID-NEXT: movq %r11, 16(%rax)
|
||||
; HYBRID-NEXT: cmovneq %rax, %r13
|
||||
; HYBRID-NEXT: movq %r13, 8(%rax)
|
||||
; HYBRID-NEXT: cmovneq %rax, %r10
|
||||
; HYBRID-NEXT: movq %r10, (%rax)
|
||||
; HYBRID-NEXT: movq %r11, (%rax)
|
||||
; HYBRID-NEXT: popq %rbx
|
||||
; HYBRID-NEXT: popq %r12
|
||||
; HYBRID-NEXT: popq %r13
|
||||
; HYBRID-NEXT: popq %r14
|
||||
; HYBRID-NEXT: popq %r15
|
||||
; HYBRID-NEXT: popq %rbp
|
||||
; HYBRID-NEXT: retq
|
||||
;
|
||||
; BURR-LABEL: test1:
|
||||
; BURR: # %bb.0:
|
||||
; BURR-NEXT: pushq %rbp
|
||||
; BURR-NEXT: pushq %r15
|
||||
; BURR-NEXT: pushq %r14
|
||||
; BURR-NEXT: pushq %r13
|
||||
|
@ -199,78 +202,79 @@ define i256 @test1(i256 %a) nounwind {
|
|||
; BURR-NEXT: adcq $0, %rdx
|
||||
; BURR-NEXT: adcq $0, %r9
|
||||
; BURR-NEXT: adcq $0, %r8
|
||||
; BURR-NEXT: xorl %r10d, %r10d
|
||||
; BURR-NEXT: leal 1(%rsi,%rsi), %edi
|
||||
; BURR-NEXT: xorl %r14d, %r14d
|
||||
; BURR-NEXT: xorl %r15d, %r15d
|
||||
; BURR-NEXT: movl %edi, %ecx
|
||||
; BURR-NEXT: shldq %cl, %r15, %r15
|
||||
; BURR-NEXT: shldq %cl, %r14, %r14
|
||||
; BURR-NEXT: testb $64, %dil
|
||||
; BURR-NEXT: cmovneq %r14, %r15
|
||||
; BURR-NEXT: movl $1, %r11d
|
||||
; BURR-NEXT: cmovneq %r10, %r14
|
||||
; BURR-NEXT: movl $1, %ebp
|
||||
; BURR-NEXT: movl $1, %r12d
|
||||
; BURR-NEXT: shlq %cl, %r12
|
||||
; BURR-NEXT: testb $64, %dil
|
||||
; BURR-NEXT: movq %r12, %r10
|
||||
; BURR-NEXT: cmovneq %r14, %r10
|
||||
; BURR-NEXT: movq %r12, %r11
|
||||
; BURR-NEXT: cmovneq %r10, %r11
|
||||
; BURR-NEXT: movq %rsi, %rbx
|
||||
; BURR-NEXT: shrdq %cl, %rdx, %rbx
|
||||
; BURR-NEXT: shrq %cl, %rdx
|
||||
; BURR-NEXT: testb $64, %dil
|
||||
; BURR-NEXT: cmoveq %rbx, %rdx
|
||||
; BURR-NEXT: xorl %r13d, %r13d
|
||||
; BURR-NEXT: shldq %cl, %r11, %r13
|
||||
; BURR-NEXT: xorl %r15d, %r15d
|
||||
; BURR-NEXT: shldq %cl, %rbp, %r15
|
||||
; BURR-NEXT: testb $64, %dil
|
||||
; BURR-NEXT: cmovneq %r12, %r13
|
||||
; BURR-NEXT: cmovneq %r12, %r15
|
||||
; BURR-NEXT: movb $-128, %cl
|
||||
; BURR-NEXT: subb %dil, %cl
|
||||
; BURR-NEXT: movq %r9, %rbx
|
||||
; BURR-NEXT: shlq %cl, %rbx
|
||||
; BURR-NEXT: movq %r9, %r13
|
||||
; BURR-NEXT: shlq %cl, %r13
|
||||
; BURR-NEXT: movl $1, %r12d
|
||||
; BURR-NEXT: shrdq %cl, %r14, %r12
|
||||
; BURR-NEXT: shrdq %cl, %r10, %r12
|
||||
; BURR-NEXT: testb $64, %cl
|
||||
; BURR-NEXT: cmovneq %r14, %r12
|
||||
; BURR-NEXT: cmovneq %r14, %rbx
|
||||
; BURR-NEXT: orl %edx, %ebx
|
||||
; BURR-NEXT: cmovneq %r10, %r12
|
||||
; BURR-NEXT: cmovneq %r10, %r13
|
||||
; BURR-NEXT: orl %edx, %r13d
|
||||
; BURR-NEXT: movl %edi, %ecx
|
||||
; BURR-NEXT: addb $-128, %cl
|
||||
; BURR-NEXT: shrdq %cl, %r8, %r9
|
||||
; BURR-NEXT: xorl %edx, %edx
|
||||
; BURR-NEXT: shldq %cl, %r11, %rdx
|
||||
; BURR-NEXT: shldq %cl, %rbp, %rdx
|
||||
; BURR-NEXT: shrq %cl, %r8
|
||||
; BURR-NEXT: shlq %cl, %r11
|
||||
; BURR-NEXT: shlq %cl, %rbp
|
||||
; BURR-NEXT: testb $64, %cl
|
||||
; BURR-NEXT: cmovneq %r11, %rdx
|
||||
; BURR-NEXT: cmovneq %rbp, %rdx
|
||||
; BURR-NEXT: cmoveq %r9, %r8
|
||||
; BURR-NEXT: cmovneq %r14, %r11
|
||||
; BURR-NEXT: cmovneq %r10, %rbp
|
||||
; BURR-NEXT: testb %dil, %dil
|
||||
; BURR-NEXT: jns .LBB0_2
|
||||
; BURR-NEXT: # %bb.1:
|
||||
; BURR-NEXT: movl %r8d, %ebx
|
||||
; BURR-NEXT: movl %r8d, %r13d
|
||||
; BURR-NEXT: .LBB0_2:
|
||||
; BURR-NEXT: je .LBB0_4
|
||||
; BURR-NEXT: # %bb.3:
|
||||
; BURR-NEXT: movl %ebx, %esi
|
||||
; BURR-NEXT: movl %r13d, %esi
|
||||
; BURR-NEXT: .LBB0_4:
|
||||
; BURR-NEXT: cmovsq %r14, %r13
|
||||
; BURR-NEXT: cmovnsq %r12, %r11
|
||||
; BURR-NEXT: cmoveq %r14, %r11
|
||||
; BURR-NEXT: cmovnsq %r15, %rdx
|
||||
; BURR-NEXT: cmoveq %r14, %rdx
|
||||
; BURR-NEXT: cmovsq %r14, %r10
|
||||
; BURR-NEXT: cmovsq %r10, %r15
|
||||
; BURR-NEXT: cmovnsq %r12, %rbp
|
||||
; BURR-NEXT: cmoveq %r10, %rbp
|
||||
; BURR-NEXT: cmovnsq %r14, %rdx
|
||||
; BURR-NEXT: cmoveq %r10, %rdx
|
||||
; BURR-NEXT: cmovsq %r10, %r11
|
||||
; BURR-NEXT: testb $1, %sil
|
||||
; BURR-NEXT: cmovneq %rax, %rdx
|
||||
; BURR-NEXT: movq %rdx, 24(%rax)
|
||||
; BURR-NEXT: cmovneq %rax, %rbp
|
||||
; BURR-NEXT: movq %rbp, 16(%rax)
|
||||
; BURR-NEXT: cmovneq %rax, %r15
|
||||
; BURR-NEXT: movq %r15, 8(%rax)
|
||||
; BURR-NEXT: cmovneq %rax, %r11
|
||||
; BURR-NEXT: movq %r11, 16(%rax)
|
||||
; BURR-NEXT: cmovneq %rax, %r13
|
||||
; BURR-NEXT: movq %r13, 8(%rax)
|
||||
; BURR-NEXT: cmovneq %rax, %r10
|
||||
; BURR-NEXT: movq %r10, (%rax)
|
||||
; BURR-NEXT: movq %r11, (%rax)
|
||||
; BURR-NEXT: popq %rbx
|
||||
; BURR-NEXT: popq %r12
|
||||
; BURR-NEXT: popq %r13
|
||||
; BURR-NEXT: popq %r14
|
||||
; BURR-NEXT: popq %r15
|
||||
; BURR-NEXT: popq %rbp
|
||||
; BURR-NEXT: retq
|
||||
;
|
||||
; SRC-LABEL: test1:
|
||||
|
@ -297,8 +301,8 @@ define i256 @test1(i256 %a) nounwind {
|
|||
; SRC-NEXT: movl %r11d, %ecx
|
||||
; SRC-NEXT: shrdq %cl, %rdx, %rbp
|
||||
; SRC-NEXT: shrq %cl, %rdx
|
||||
; SRC-NEXT: movl $1, %edi
|
||||
; SRC-NEXT: xorl %r15d, %r15d
|
||||
; SRC-NEXT: movl $1, %edi
|
||||
; SRC-NEXT: xorl %r14d, %r14d
|
||||
; SRC-NEXT: shldq %cl, %rdi, %r14
|
||||
; SRC-NEXT: xorl %r13d, %r13d
|
||||
|
@ -906,15 +910,15 @@ define i64 @test4(i64 %a, i64 %b) nounwind {
|
|||
; ILP-LABEL: test4:
|
||||
; ILP: # %bb.0:
|
||||
; ILP-NEXT: xorl %ecx, %ecx
|
||||
; ILP-NEXT: addq $1, %rsi
|
||||
; ILP-NEXT: setb %cl
|
||||
; ILP-NEXT: movl $2, %eax
|
||||
; ILP-NEXT: xorl %edx, %edx
|
||||
; ILP-NEXT: addq $1, %rsi
|
||||
; ILP-NEXT: setb %dl
|
||||
; ILP-NEXT: movl $2, %eax
|
||||
; ILP-NEXT: cmpq %rdi, %rsi
|
||||
; ILP-NEXT: sbbq $0, %rcx
|
||||
; ILP-NEXT: movl $0, %ecx
|
||||
; ILP-NEXT: sbbq $0, %rcx
|
||||
; ILP-NEXT: sbbq $0, %rdx
|
||||
; ILP-NEXT: movl $0, %edx
|
||||
; ILP-NEXT: sbbq $0, %rdx
|
||||
; ILP-NEXT: sbbq $0, %rcx
|
||||
; ILP-NEXT: setae %cl
|
||||
; ILP-NEXT: movzbl %cl, %ecx
|
||||
; ILP-NEXT: subq %rcx, %rax
|
||||
|
@ -923,14 +927,14 @@ define i64 @test4(i64 %a, i64 %b) nounwind {
|
|||
; HYBRID-LABEL: test4:
|
||||
; HYBRID: # %bb.0:
|
||||
; HYBRID-NEXT: xorl %eax, %eax
|
||||
; HYBRID-NEXT: addq $1, %rsi
|
||||
; HYBRID-NEXT: setb %al
|
||||
; HYBRID-NEXT: xorl %ecx, %ecx
|
||||
; HYBRID-NEXT: addq $1, %rsi
|
||||
; HYBRID-NEXT: setb %cl
|
||||
; HYBRID-NEXT: cmpq %rdi, %rsi
|
||||
; HYBRID-NEXT: sbbq $0, %rax
|
||||
; HYBRID-NEXT: movl $0, %eax
|
||||
; HYBRID-NEXT: sbbq $0, %rax
|
||||
; HYBRID-NEXT: sbbq $0, %rcx
|
||||
; HYBRID-NEXT: movl $0, %ecx
|
||||
; HYBRID-NEXT: sbbq $0, %rcx
|
||||
; HYBRID-NEXT: sbbq $0, %rax
|
||||
; HYBRID-NEXT: setae %al
|
||||
; HYBRID-NEXT: movzbl %al, %ecx
|
||||
; HYBRID-NEXT: movl $2, %eax
|
||||
|
@ -940,14 +944,14 @@ define i64 @test4(i64 %a, i64 %b) nounwind {
|
|||
; BURR-LABEL: test4:
|
||||
; BURR: # %bb.0:
|
||||
; BURR-NEXT: xorl %eax, %eax
|
||||
; BURR-NEXT: addq $1, %rsi
|
||||
; BURR-NEXT: setb %al
|
||||
; BURR-NEXT: xorl %ecx, %ecx
|
||||
; BURR-NEXT: addq $1, %rsi
|
||||
; BURR-NEXT: setb %cl
|
||||
; BURR-NEXT: cmpq %rdi, %rsi
|
||||
; BURR-NEXT: sbbq $0, %rax
|
||||
; BURR-NEXT: movl $0, %eax
|
||||
; BURR-NEXT: sbbq $0, %rax
|
||||
; BURR-NEXT: sbbq $0, %rcx
|
||||
; BURR-NEXT: movl $0, %ecx
|
||||
; BURR-NEXT: sbbq $0, %rcx
|
||||
; BURR-NEXT: sbbq $0, %rax
|
||||
; BURR-NEXT: setae %al
|
||||
; BURR-NEXT: movzbl %al, %ecx
|
||||
; BURR-NEXT: movl $2, %eax
|
||||
|
|
|
@ -1,75 +0,0 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
|
||||
|
||||
; This test checks that we use "movq $0, (%rsp)" to spill a 0 to the stack. It
|
||||
; was reduced from a larger function.
|
||||
|
||||
; CHECK: movq $0, (%rsp) # 8-byte Folded Spill
|
||||
|
||||
%struct.foo = type { i8*, i32 }
|
||||
|
||||
declare void @pluto()
|
||||
|
||||
define void @spam() {
|
||||
bb:
|
||||
br label %bb13
|
||||
|
||||
bb1: ; preds = %bb18
|
||||
call void @pluto()
|
||||
%tmp = getelementptr inbounds %struct.foo, %struct.foo* %tmp20, i64 0, i32 1
|
||||
%tmp2 = bitcast i32* %tmp to %struct.foo**
|
||||
store %struct.foo* null, %struct.foo** %tmp2
|
||||
unreachable
|
||||
|
||||
bb3: ; preds = %bb18
|
||||
call void @pluto()
|
||||
store i8* %tmp22, i8** undef
|
||||
unreachable
|
||||
|
||||
bb4: ; preds = %bb18
|
||||
call void @pluto()
|
||||
br label %bb13
|
||||
|
||||
bb5: ; preds = %bb18
|
||||
%tmp7 = add nsw i32 %tmp23, 1
|
||||
store i8* %tmp22, i8** undef
|
||||
unreachable
|
||||
|
||||
bb8: ; preds = %bb18
|
||||
store %struct.foo* %tmp14, %struct.foo** undef
|
||||
unreachable
|
||||
|
||||
bb9: ; preds = %bb18
|
||||
%tmp10 = load %struct.foo*, %struct.foo** undef
|
||||
br label %bb13
|
||||
|
||||
bb13: ; preds = %bb18, %bb9, %bb4, %bb
|
||||
%tmp14 = phi %struct.foo* [ %tmp14, %bb18 ], [ %tmp14, %bb4 ], [ null, %bb ], [ %tmp10, %bb9 ]
|
||||
%tmp15 = phi %struct.foo* [ %tmp26, %bb18 ], [ %tmp26, %bb4 ], [ null, %bb ], [ %tmp26, %bb9 ]
|
||||
%tmp16 = phi i32 [ %tmp23, %bb18 ], [ %tmp23, %bb4 ], [ 0, %bb ], [ %tmp23, %bb9 ]
|
||||
br label %bb17
|
||||
|
||||
bb17: ; preds = %bb13
|
||||
br i1 false, label %bb27, label %bb18
|
||||
|
||||
bb18: ; preds = %bb17
|
||||
%tmp19 = load %struct.foo*, %struct.foo** undef
|
||||
%tmp20 = getelementptr inbounds %struct.foo, %struct.foo* %tmp19, i64 0
|
||||
%tmp21 = getelementptr inbounds %struct.foo, %struct.foo* %tmp20, i64 0, i32 0
|
||||
%tmp22 = load i8*, i8** %tmp21
|
||||
%tmp23 = add nsw i32 %tmp16, -1
|
||||
%tmp24 = getelementptr inbounds %struct.foo, %struct.foo* %tmp15, i64 0, i32 1
|
||||
%tmp25 = bitcast i32* %tmp24 to %struct.foo**
|
||||
%tmp26 = load %struct.foo*, %struct.foo** %tmp25
|
||||
switch i32 undef, label %bb9 [
|
||||
i32 1, label %bb1
|
||||
i32 2, label %bb3
|
||||
i32 3, label %bb4
|
||||
i32 4, label %bb5
|
||||
i32 5, label %bb13
|
||||
i32 6, label %bb8
|
||||
]
|
||||
|
||||
bb27: ; preds = %bb17
|
||||
ret void
|
||||
}
|
|
@ -41,7 +41,8 @@ define float @caller(i8* %error_ref) {
|
|||
; CHECK-APPLE: callq {{.*}}free
|
||||
|
||||
; CHECK-O0-LABEL: caller:
|
||||
; CHECK-O0: xorl %r12d, %r12d
|
||||
; CHECK-O0: xorl
|
||||
; CHECK-O0: movl %{{.*}}, %r12d
|
||||
; CHECK-O0: callq {{.*}}foo
|
||||
; CHECK-O0: jne
|
||||
entry:
|
||||
|
@ -77,7 +78,8 @@ define float @caller2(i8* %error_ref) {
|
|||
; CHECK-APPLE: callq {{.*}}free
|
||||
|
||||
; CHECK-O0-LABEL: caller2:
|
||||
; CHECK-O0: xorl %r12d, %r12d
|
||||
; CHECK-O0: xorl
|
||||
; CHECK-O0: movl %{{.*}}, %r12d
|
||||
; CHECK-O0: callq {{.*}}foo
|
||||
; CHECK-O0: movq %r12, [[ID:%[a-z]+]]
|
||||
; CHECK-O0: cmpq $0, %r12
|
||||
|
@ -252,7 +254,8 @@ define float @caller3(i8* %error_ref) {
|
|||
; CHECK-APPLE: callq {{.*}}free
|
||||
|
||||
; CHECK-O0-LABEL: caller3:
|
||||
; CHECK-O0: xorl %r12d, %r12d
|
||||
; CHECK-O0: xorl
|
||||
; CHECK-O0: movl {{.*}}, %r12d
|
||||
; CHECK-O0: movl $1, %esi
|
||||
; CHECK-O0: movq {{.*}}, %rdi
|
||||
; CHECK-O0: callq {{.*}}foo_sret
|
||||
|
@ -310,12 +313,14 @@ define float @caller_with_multiple_swifterror_values(i8* %error_ref, i8* %error_
|
|||
; CHECK-O0-LABEL: caller_with_multiple_swifterror_values:
|
||||
|
||||
; The first swifterror value:
|
||||
; CHECK-O0: xorl %r12d, %r12d
|
||||
; CHECK-O0: xorl
|
||||
; CHECK-O0: movl %{{.*}}, %r12d
|
||||
; CHECK-O0: callq {{.*}}foo
|
||||
; CHECK-O0: jne
|
||||
|
||||
; The second swifterror value:
|
||||
; CHECK-O0: xorl %r12d, %r12d
|
||||
; CHECK-O0: xorl
|
||||
; CHECK-O0: movl %{{.*}}, %r12d
|
||||
; CHECK-O0: callq {{.*}}foo
|
||||
; CHECK-O0: jne
|
||||
entry:
|
||||
|
@ -710,7 +715,8 @@ declare swiftcc void @foo2(%swift_error** swifterror)
|
|||
; Make sure we properly assign registers during fast-isel.
|
||||
; CHECK-O0-LABEL: testAssign
|
||||
; CHECK-O0: pushq %r12
|
||||
; CHECK-O0: xorl %r12d, %r12d
|
||||
; CHECK-O0: xorl [[ZERO:%[a-z0-9]+]], [[ZERO]]
|
||||
; CHECK-O0: movl [[ZERO]], %r12d
|
||||
; CHECK-O0: callq _foo2
|
||||
; CHECK-O0: movq %r12, [[SLOT:[-a-z0-9\(\)\%]*]]
|
||||
;
|
||||
|
@ -786,7 +792,8 @@ a:
|
|||
|
||||
; CHECK-O0-LABEL: testAssign4
|
||||
; CHECK-O0: callq _foo2
|
||||
; CHECK-O0: xorl %eax, %eax
|
||||
; CHECK-O0: xorl %ecx, %ecx
|
||||
; CHECK-O0: movl %ecx, %eax
|
||||
; CHECK-O0: movq %rax, [[SLOT:[-a-z0-9\(\)\%]*]]
|
||||
; CHECK-O0: movq [[SLOT]], %rax
|
||||
; CHECK-O0: movq %rax, [[SLOT2:[-a-z0-9\(\)\%]*]]
|
||||
|
|
Loading…
Reference in New Issue