forked from OSchip/llvm-project
Revert "[X86FixupLEAs] Transform the sequence LEA/SUB to SUB/SUB"
This reverts commit 1b748faf2b
because it
breaks building the llvm-test-suite with -verify-machineinstrs on X86:
http://green.lab.llvm.org/green/job/test-suite-verify-machineinstrs-x86_64-O3/9585/
Running llc -verify-machineinstr on X86 crashes on the IR below:
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
%struct.widget = type { i32, i32, i32, i32, i32*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [16 x [16 x i16]], [6 x [32 x i32]], [16 x [16 x i32]], [4 x [12 x [4 x [4 x i32]]]], [16 x i32], i8**, i32*, i32***, i32**, i32, i32, i32, i32, %struct.baz*, %struct.wobble.1*, i32, i32, i32, i32, i32, i32, %struct.quux.2*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [3 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32***, i32***, i32****, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [3 x [2 x i32]], [3 x [2 x i32]], i32, i32, i64, i64, %struct.zot.3, %struct.zot.3, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
%struct.baz = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.snork*, %struct.wombat.0*, %struct.wobble*, i32, i32*, i32*, i32*, i32, i32*, i32*, i32*, i32 (%struct.widget*, %struct.eggs*)*, i32, i32, i32, i32 }
%struct.snork = type { %struct.spam*, %struct.zot, i32 (%struct.wombat*, %struct.widget*, %struct.snork*)* }
%struct.spam = type { i32, i32, i32, i32, i8*, i32 }
%struct.zot = type { i32, i32, i32, i32, i32, i8*, i32* }
%struct.wombat = type { i32, i32, i32, i32, i32, i32, i32, i32, void (i32, i32, i32*, i32*)*, void (%struct.wombat*, %struct.widget*, %struct.zot*)* }
%struct.wombat.0 = type { [4 x [11 x %struct.quux]], [2 x [9 x %struct.quux]], [2 x [10 x %struct.quux]], [2 x [6 x %struct.quux]], [4 x %struct.quux], [4 x %struct.quux], [3 x %struct.quux] }
%struct.quux = type { i16, i8 }
%struct.wobble = type { [2 x %struct.quux], [4 x %struct.quux], [3 x [4 x %struct.quux]], [10 x [4 x %struct.quux]], [10 x [15 x %struct.quux]], [10 x [15 x %struct.quux]], [10 x [5 x %struct.quux]], [10 x [5 x %struct.quux]], [10 x [15 x %struct.quux]], [10 x [15 x %struct.quux]] }
%struct.eggs = type { [1000 x i8], [1000 x i8], [1000 x i8], i32, i32, i32, i32, i32, i32, i32, i32 }
%struct.wobble.1 = type { i32, [2 x i32], i32, i32, %struct.wobble.1*, %struct.wobble.1*, i32, [2 x [4 x [4 x [2 x i32]]]], i32, i64, i64, i32, i32, [4 x i8], [4 x i8], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
%struct.quux.2 = type { i32, i32, i32, i32, i32, %struct.quux.2* }
%struct.zot.3 = type { i64, i16, i16, i16 }
define void @blam(%struct.widget* %arg, i32 %arg1) local_unnamed_addr {
bb:
%tmp = load i32, i32* undef, align 4
%tmp2 = sdiv i32 %tmp, 6
%tmp3 = sdiv i32 undef, 6
%tmp4 = load i32, i32* undef, align 4
%tmp5 = icmp eq i32 %tmp4, 4
%tmp6 = select i1 %tmp5, i32 %tmp3, i32 %tmp2
%tmp7 = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* undef, i64 0, i64 0, i64 0
%tmp8 = zext i16 undef to i32
%tmp9 = zext i16 undef to i32
%tmp10 = load i16, i16* undef, align 2
%tmp11 = zext i16 %tmp10 to i32
%tmp12 = zext i16 undef to i32
%tmp13 = zext i16 undef to i32
%tmp14 = zext i16 undef to i32
%tmp15 = load i16, i16* undef, align 2
%tmp16 = zext i16 %tmp15 to i32
%tmp17 = zext i16 undef to i32
%tmp18 = sub nsw i32 %tmp8, %tmp9
%tmp19 = shl nsw i32 undef, 1
%tmp20 = add nsw i32 %tmp19, %tmp18
%tmp21 = sub nsw i32 %tmp11, %tmp12
%tmp22 = shl nsw i32 undef, 1
%tmp23 = add nsw i32 %tmp22, %tmp21
%tmp24 = sub nsw i32 %tmp13, %tmp14
%tmp25 = shl nsw i32 undef, 1
%tmp26 = add nsw i32 %tmp25, %tmp24
%tmp27 = sub nsw i32 %tmp16, %tmp17
%tmp28 = shl nsw i32 undef, 1
%tmp29 = add nsw i32 %tmp28, %tmp27
%tmp30 = sub nsw i32 %tmp20, %tmp29
%tmp31 = sub nsw i32 %tmp23, %tmp26
%tmp32 = shl nsw i32 %tmp30, 1
%tmp33 = add nsw i32 %tmp32, %tmp31
store i32 %tmp33, i32* undef, align 4
%tmp34 = mul nsw i32 %tmp31, -2
%tmp35 = add nsw i32 %tmp34, %tmp30
store i32 %tmp35, i32* undef, align 4
%tmp36 = select i1 %tmp5, i32 undef, i32 undef
br label %bb37
bb37: ; preds = %bb
%tmp38 = load i32, i32* undef, align 4
%tmp39 = ashr i32 %tmp38, %tmp6
%tmp40 = load i32, i32* undef, align 4
%tmp41 = sdiv i32 %tmp39, %tmp40
store i32 %tmp41, i32* undef, align 4
ret void
}
This commit is contained in:
parent
e087b4f149
commit
5cd66420cc
|
@ -459,13 +459,6 @@ public:
|
|||
unsigned &SrcOpIdx1,
|
||||
unsigned &SrcOpIdx2) const;
|
||||
|
||||
/// Returns true if the target has a preference on the operands order of
|
||||
/// the given machine instruction. And specify if \p Commute is required to
|
||||
/// get the desired operands order.
|
||||
virtual bool hasCommutePreference(MachineInstr &MI, bool &Commute) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
/// A pair composed of a register and a sub-register index.
|
||||
/// Used to give some type checking when modeling Reg:SubReg.
|
||||
struct RegSubRegPair {
|
||||
|
|
|
@ -527,11 +527,6 @@ bool TwoAddressInstructionPass::isProfitableToCommute(Register RegA,
|
|||
if (isRevCopyChain(RegB, RegA, MaxDataFlowEdge))
|
||||
return false;
|
||||
|
||||
// Look for other target specific commute preference.
|
||||
bool Commute;
|
||||
if (TII->hasCommutePreference(*MI, Commute))
|
||||
return Commute;
|
||||
|
||||
// Since there are no intervening uses for both registers, then commute
|
||||
// if the def of RegC is closer. Its live interval is shorter.
|
||||
return LastDefB && LastDefC && LastDefC > LastDefB;
|
||||
|
|
|
@ -79,27 +79,6 @@ class FixupLEAPass : public MachineFunctionPass {
|
|||
MachineBasicBlock &MBB, bool OptIncDec,
|
||||
bool UseLEAForSP) const;
|
||||
|
||||
/// Look for and transform the sequence
|
||||
/// lea (reg1, reg2), reg3
|
||||
/// sub reg3, reg4
|
||||
/// to
|
||||
/// sub reg1, reg4
|
||||
/// sub reg2, reg4
|
||||
/// It can also optimize the sequence lea/add similarly.
|
||||
bool optLEAALU(MachineBasicBlock::iterator &I, MachineBasicBlock &MBB) const;
|
||||
|
||||
/// Step forwards in MBB, looking for an ADD/SUB instruction which uses
|
||||
/// the dest register of LEA instruction I.
|
||||
MachineBasicBlock::iterator searchALUInst(MachineBasicBlock::iterator &I,
|
||||
MachineBasicBlock &MBB) const;
|
||||
|
||||
/// Check instructions between LeaI and AluI (exclusively).
|
||||
/// Set BaseIndexDef to true if base or index register from LeaI is defined.
|
||||
/// Set AluDestRef to true if the dest register of AluI is used or defined.
|
||||
void checkRegUsage(MachineBasicBlock::iterator &LeaI,
|
||||
MachineBasicBlock::iterator &AluI, bool &BaseIndexDef,
|
||||
bool &AluDestRef) const;
|
||||
|
||||
/// Determine if an instruction references a machine register
|
||||
/// and, if so, whether it reads or writes the register.
|
||||
RegUsageState usesRegister(MachineOperand &p, MachineBasicBlock::iterator I);
|
||||
|
@ -359,18 +338,6 @@ static inline unsigned getADDrrFromLEA(unsigned LEAOpcode) {
|
|||
}
|
||||
}
|
||||
|
||||
static inline unsigned getSUBrrFromLEA(unsigned LEAOpcode) {
|
||||
switch (LEAOpcode) {
|
||||
default:
|
||||
llvm_unreachable("Unexpected LEA instruction");
|
||||
case X86::LEA32r:
|
||||
case X86::LEA64_32r:
|
||||
return X86::SUB32rr;
|
||||
case X86::LEA64r:
|
||||
return X86::SUB64rr;
|
||||
}
|
||||
}
|
||||
|
||||
static inline unsigned getADDriFromLEA(unsigned LEAOpcode,
|
||||
const MachineOperand &Offset) {
|
||||
bool IsInt8 = Offset.isImm() && isInt<8>(Offset.getImm());
|
||||
|
@ -397,136 +364,6 @@ static inline unsigned getINCDECFromLEA(unsigned LEAOpcode, bool IsINC) {
|
|||
}
|
||||
}
|
||||
|
||||
MachineBasicBlock::iterator
|
||||
FixupLEAPass::searchALUInst(MachineBasicBlock::iterator &I,
|
||||
MachineBasicBlock &MBB) const {
|
||||
const int InstrDistanceThreshold = 5;
|
||||
int InstrDistance = 1;
|
||||
MachineBasicBlock::iterator CurInst = std::next(I);
|
||||
|
||||
unsigned LEAOpcode = I->getOpcode();
|
||||
unsigned AddOpcode = getADDrrFromLEA(LEAOpcode);
|
||||
unsigned SubOpcode = getSUBrrFromLEA(LEAOpcode);
|
||||
Register DestReg = I->getOperand(0).getReg();
|
||||
|
||||
while (CurInst != MBB.end()) {
|
||||
if (CurInst->isCall() || CurInst->isInlineAsm())
|
||||
break;
|
||||
if (InstrDistance > InstrDistanceThreshold)
|
||||
break;
|
||||
|
||||
// Check if the lea dest register is used in an add/sub instruction only.
|
||||
for (unsigned I = 0, E = CurInst->getNumOperands(); I != E; ++I) {
|
||||
MachineOperand &Opnd = CurInst->getOperand(I);
|
||||
if (Opnd.isReg() && Opnd.getReg() == DestReg) {
|
||||
if (Opnd.isDef() || !Opnd.isKill())
|
||||
return MachineBasicBlock::iterator();
|
||||
|
||||
unsigned AluOpcode = CurInst->getOpcode();
|
||||
if (AluOpcode != AddOpcode && AluOpcode != SubOpcode)
|
||||
return MachineBasicBlock::iterator();
|
||||
|
||||
MachineOperand &Opnd2 = CurInst->getOperand(3 - I);
|
||||
MachineOperand AluDest = CurInst->getOperand(0);
|
||||
if (Opnd2.getReg() != AluDest.getReg())
|
||||
return MachineBasicBlock::iterator();
|
||||
|
||||
// X - (Y + Z) may generate different flags than (X - Y) - Z when there
|
||||
// is overflow. So we can't change the alu instruction if the flags
|
||||
// register is live.
|
||||
if (!CurInst->registerDefIsDead(X86::EFLAGS, TRI))
|
||||
return MachineBasicBlock::iterator();
|
||||
|
||||
return CurInst;
|
||||
}
|
||||
}
|
||||
|
||||
InstrDistance++;
|
||||
++CurInst;
|
||||
}
|
||||
return MachineBasicBlock::iterator();
|
||||
}
|
||||
|
||||
void FixupLEAPass::checkRegUsage(MachineBasicBlock::iterator &LeaI,
|
||||
MachineBasicBlock::iterator &AluI,
|
||||
bool &BaseIndexDef, bool &AluDestRef) const {
|
||||
BaseIndexDef = AluDestRef = false;
|
||||
Register BaseReg = LeaI->getOperand(1 + X86::AddrBaseReg).getReg();
|
||||
Register IndexReg = LeaI->getOperand(1 + X86::AddrIndexReg).getReg();
|
||||
Register AluDestReg = AluI->getOperand(0).getReg();
|
||||
|
||||
MachineBasicBlock::iterator CurInst = std::next(LeaI);
|
||||
while (CurInst != AluI) {
|
||||
for (unsigned I = 0, E = CurInst->getNumOperands(); I != E; ++I) {
|
||||
MachineOperand &Opnd = CurInst->getOperand(I);
|
||||
if (!Opnd.isReg())
|
||||
continue;
|
||||
Register Reg = Opnd.getReg();
|
||||
if (TRI->regsOverlap(Reg, AluDestReg))
|
||||
AluDestRef = true;
|
||||
if (Opnd.isDef() &&
|
||||
(TRI->regsOverlap(Reg, BaseReg) || TRI->regsOverlap(Reg, IndexReg))) {
|
||||
BaseIndexDef = true;
|
||||
}
|
||||
}
|
||||
++CurInst;
|
||||
}
|
||||
}
|
||||
|
||||
bool FixupLEAPass::optLEAALU(MachineBasicBlock::iterator &I,
|
||||
MachineBasicBlock &MBB) const {
|
||||
// Look for an add/sub instruction which uses the result of lea.
|
||||
MachineBasicBlock::iterator AluI = searchALUInst(I, MBB);
|
||||
if (AluI == MachineBasicBlock::iterator())
|
||||
return false;
|
||||
|
||||
// Check if there are any related register usage between lea and alu.
|
||||
bool BaseIndexDef, AluDestRef;
|
||||
checkRegUsage(I, AluI, BaseIndexDef, AluDestRef);
|
||||
|
||||
MachineBasicBlock::iterator InsertPos = AluI;
|
||||
if (BaseIndexDef) {
|
||||
if (AluDestRef)
|
||||
return false;
|
||||
InsertPos = I;
|
||||
}
|
||||
|
||||
// Check if there are same registers.
|
||||
Register AluDestReg = AluI->getOperand(0).getReg();
|
||||
Register BaseReg = I->getOperand(1 + X86::AddrBaseReg).getReg();
|
||||
Register IndexReg = I->getOperand(1 + X86::AddrIndexReg).getReg();
|
||||
if (I->getOpcode() == X86::LEA64_32r) {
|
||||
BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
|
||||
IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
|
||||
}
|
||||
if (AluDestReg == IndexReg) {
|
||||
if (BaseReg == IndexReg)
|
||||
return false;
|
||||
std::swap(BaseReg, IndexReg);
|
||||
}
|
||||
|
||||
// Now it's safe to change instructions.
|
||||
MachineInstr *NewMI1, *NewMI2;
|
||||
unsigned NewOpcode = AluI->getOpcode();
|
||||
NewMI1 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode),
|
||||
AluDestReg)
|
||||
.addReg(AluDestReg)
|
||||
.addReg(BaseReg);
|
||||
NewMI1->addRegisterDead(X86::EFLAGS, TRI);
|
||||
NewMI2 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode),
|
||||
AluDestReg)
|
||||
.addReg(AluDestReg)
|
||||
.addReg(IndexReg);
|
||||
NewMI2->addRegisterDead(X86::EFLAGS, TRI);
|
||||
|
||||
MBB.getParent()->substituteDebugValuesForInst(*AluI, *NewMI1, 1);
|
||||
MBB.getParent()->substituteDebugValuesForInst(*AluI, *NewMI2, 1);
|
||||
MBB.erase(I);
|
||||
MBB.erase(AluI);
|
||||
I = NewMI1;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I,
|
||||
MachineBasicBlock &MBB, bool OptIncDec,
|
||||
bool UseLEAForSP) const {
|
||||
|
@ -561,7 +398,6 @@ bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I,
|
|||
|
||||
MachineInstr *NewMI = nullptr;
|
||||
|
||||
// Case 1.
|
||||
// Look for lea(%reg1, %reg2), %reg1 or lea(%reg2, %reg1), %reg1
|
||||
// which can be turned into add %reg2, %reg1
|
||||
if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0 &&
|
||||
|
@ -581,7 +417,6 @@ bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I,
|
|||
.addReg(BaseReg).addReg(IndexReg);
|
||||
}
|
||||
} else if (DestReg == BaseReg && IndexReg == 0) {
|
||||
// Case 2.
|
||||
// This is an LEA with only a base register and a displacement,
|
||||
// We can use ADDri or INC/DEC.
|
||||
|
||||
|
@ -612,12 +447,6 @@ bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I,
|
|||
.addReg(BaseReg).addImm(Disp.getImm());
|
||||
}
|
||||
}
|
||||
} else if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0) {
|
||||
// Case 3.
|
||||
// Look for and transform the sequence
|
||||
// lea (reg1, reg2), reg3
|
||||
// sub reg3, reg4
|
||||
return optLEAALU(I, MBB);
|
||||
} else
|
||||
return false;
|
||||
|
||||
|
|
|
@ -2670,58 +2670,6 @@ bool X86InstrInfo::findCommutedOpIndices(const MachineInstr &MI,
|
|||
return false;
|
||||
}
|
||||
|
||||
static bool isConvertibleLEA(MachineInstr *MI) {
|
||||
unsigned Opcode = MI->getOpcode();
|
||||
if (Opcode != X86::LEA32r && Opcode != X86::LEA64r &&
|
||||
Opcode != X86::LEA64_32r)
|
||||
return false;
|
||||
|
||||
const MachineOperand &Scale = MI->getOperand(1 + X86::AddrScaleAmt);
|
||||
const MachineOperand &Disp = MI->getOperand(1 + X86::AddrDisp);
|
||||
const MachineOperand &Segment = MI->getOperand(1 + X86::AddrSegmentReg);
|
||||
|
||||
if (Segment.getReg() != 0 || !Disp.isImm() || Disp.getImm() != 0 ||
|
||||
Scale.getImm() > 1)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool X86InstrInfo::hasCommutePreference(MachineInstr &MI, bool &Commute) const {
|
||||
// Currently we're interested in following sequence only.
|
||||
// r3 = lea r1, r2
|
||||
// r5 = add r3, r4
|
||||
// Both r3 and r4 are killed in add, we hope the add instruction has the
|
||||
// operand order
|
||||
// r5 = add r4, r3
|
||||
// So later in X86FixupLEAs the lea instruction can be rewritten as add.
|
||||
unsigned Opcode = MI.getOpcode();
|
||||
if (Opcode != X86::ADD32rr && Opcode != X86::ADD64rr)
|
||||
return false;
|
||||
|
||||
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
|
||||
Register Reg1 = MI.getOperand(1).getReg();
|
||||
Register Reg2 = MI.getOperand(2).getReg();
|
||||
|
||||
// Check if Reg1 comes from LEA in the same MBB.
|
||||
if (MachineInstr *Inst = MRI.getUniqueVRegDef(Reg1)) {
|
||||
if (isConvertibleLEA(Inst) && Inst->getParent() == MI.getParent()) {
|
||||
Commute = true;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Check if Reg2 comes from LEA in the same MBB.
|
||||
if (MachineInstr *Inst = MRI.getUniqueVRegDef(Reg2)) {
|
||||
if (isConvertibleLEA(Inst) && Inst->getParent() == MI.getParent()) {
|
||||
Commute = false;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
X86::CondCode X86::getCondFromBranch(const MachineInstr &MI) {
|
||||
switch (MI.getOpcode()) {
|
||||
default: return X86::COND_INVALID;
|
||||
|
|
|
@ -284,10 +284,6 @@ public:
|
|||
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1,
|
||||
unsigned &SrcOpIdx2) const override;
|
||||
|
||||
/// Returns true if we have preference on the operands order in MI, the
|
||||
/// commute decision is returned in Commute.
|
||||
bool hasCommutePreference(MachineInstr &MI, bool &Commute) const override;
|
||||
|
||||
/// Returns an adjusted FMA opcode that must be used in FMA instruction that
|
||||
/// performs the same computations as the given \p MI but which has the
|
||||
/// operands \p SrcOpIdx1 and \p SrcOpIdx2 commuted.
|
||||
|
|
|
@ -29,9 +29,9 @@ define fastcc i64 @foo() nounwind {
|
|||
; CHECK-NEXT: addq %r15, %rbx
|
||||
; CHECK-NEXT: addq %rdx, %rbx
|
||||
; CHECK-NEXT: addq %rsi, %rbx
|
||||
; CHECK-NEXT: leaq (%r9,%r10), %rdx
|
||||
; CHECK-NEXT: addq %rdx, %rdx
|
||||
; CHECK-NEXT: addq %r8, %rdx
|
||||
; CHECK-NEXT: leaq (%r9,%r10), %rsi
|
||||
; CHECK-NEXT: leaq (%rsi,%r8), %rdx
|
||||
; CHECK-NEXT: addq %rsi, %rdx
|
||||
; CHECK-NEXT: movq X(%rip), %rdi
|
||||
; CHECK-NEXT: addq %rbx, %r12
|
||||
; CHECK-NEXT: addq %r8, %rdx
|
||||
|
@ -41,9 +41,9 @@ define fastcc i64 @foo() nounwind {
|
|||
; CHECK-NEXT: addq %r12, %rsi
|
||||
; CHECK-NEXT: addq %r11, %rdi
|
||||
; CHECK-NEXT: addq %rsi, %rdi
|
||||
; CHECK-NEXT: leaq (%r10,%r8), %rsi
|
||||
; CHECK-NEXT: addq %rsi, %rsi
|
||||
; CHECK-NEXT: addq %rdx, %rsi
|
||||
; CHECK-NEXT: leaq (%r10,%r8), %rbx
|
||||
; CHECK-NEXT: leaq (%rdx,%rbx), %rsi
|
||||
; CHECK-NEXT: addq %rbx, %rsi
|
||||
; CHECK-NEXT: movq X(%rip), %rbx
|
||||
; CHECK-NEXT: addq %r12, %rdi
|
||||
; CHECK-NEXT: addq %rdi, %r9
|
||||
|
@ -54,9 +54,9 @@ define fastcc i64 @foo() nounwind {
|
|||
; CHECK-NEXT: addq %r9, %rdi
|
||||
; CHECK-NEXT: addq %r14, %rbx
|
||||
; CHECK-NEXT: addq %rdi, %rbx
|
||||
; CHECK-NEXT: leaq (%rdx,%r8), %rdi
|
||||
; CHECK-NEXT: addq %rdi, %rdi
|
||||
; CHECK-NEXT: addq %rsi, %rdi
|
||||
; CHECK-NEXT: leaq (%rdx,%r8), %rax
|
||||
; CHECK-NEXT: leaq (%rsi,%rax), %rdi
|
||||
; CHECK-NEXT: addq %rax, %rdi
|
||||
; CHECK-NEXT: movq X(%rip), %rcx
|
||||
; CHECK-NEXT: addq %r9, %rbx
|
||||
; CHECK-NEXT: addq %rbx, %r10
|
||||
|
@ -67,9 +67,9 @@ define fastcc i64 @foo() nounwind {
|
|||
; CHECK-NEXT: addq %r10, %rax
|
||||
; CHECK-NEXT: addq %r15, %rcx
|
||||
; CHECK-NEXT: addq %rax, %rcx
|
||||
; CHECK-NEXT: leaq (%rsi,%rdx), %r11
|
||||
; CHECK-NEXT: addq %r11, %r11
|
||||
; CHECK-NEXT: addq %rdi, %r11
|
||||
; CHECK-NEXT: leaq (%rsi,%rdx), %rbx
|
||||
; CHECK-NEXT: leaq (%rdi,%rbx), %r11
|
||||
; CHECK-NEXT: addq %rbx, %r11
|
||||
; CHECK-NEXT: movq X(%rip), %rbx
|
||||
; CHECK-NEXT: addq %r10, %rcx
|
||||
; CHECK-NEXT: addq %rcx, %r8
|
||||
|
@ -80,9 +80,9 @@ define fastcc i64 @foo() nounwind {
|
|||
; CHECK-NEXT: addq %r8, %rcx
|
||||
; CHECK-NEXT: addq %r12, %rbx
|
||||
; CHECK-NEXT: addq %rcx, %rbx
|
||||
; CHECK-NEXT: leaq (%rdi,%rsi), %r14
|
||||
; CHECK-NEXT: addq %r14, %r14
|
||||
; CHECK-NEXT: addq %r11, %r14
|
||||
; CHECK-NEXT: leaq (%rdi,%rsi), %rax
|
||||
; CHECK-NEXT: leaq (%r11,%rax), %r14
|
||||
; CHECK-NEXT: addq %rax, %r14
|
||||
; CHECK-NEXT: movq X(%rip), %rax
|
||||
; CHECK-NEXT: addq %r8, %rbx
|
||||
; CHECK-NEXT: addq %rbx, %rdx
|
||||
|
@ -93,9 +93,9 @@ define fastcc i64 @foo() nounwind {
|
|||
; CHECK-NEXT: addq %rdx, %rbx
|
||||
; CHECK-NEXT: addq %r9, %rax
|
||||
; CHECK-NEXT: addq %rbx, %rax
|
||||
; CHECK-NEXT: leaq (%r11,%rdi), %r9
|
||||
; CHECK-NEXT: addq %r9, %r9
|
||||
; CHECK-NEXT: addq %r14, %r9
|
||||
; CHECK-NEXT: leaq (%r11,%rdi), %rbx
|
||||
; CHECK-NEXT: leaq (%r14,%rbx), %r9
|
||||
; CHECK-NEXT: addq %rbx, %r9
|
||||
; CHECK-NEXT: movq X(%rip), %rbx
|
||||
; CHECK-NEXT: addq %rdx, %rax
|
||||
; CHECK-NEXT: addq %rax, %rsi
|
||||
|
@ -106,9 +106,9 @@ define fastcc i64 @foo() nounwind {
|
|||
; CHECK-NEXT: addq %rsi, %rax
|
||||
; CHECK-NEXT: addq %r10, %rbx
|
||||
; CHECK-NEXT: addq %rax, %rbx
|
||||
; CHECK-NEXT: leaq (%r14,%r11), %r10
|
||||
; CHECK-NEXT: addq %r10, %r10
|
||||
; CHECK-NEXT: addq %r9, %r10
|
||||
; CHECK-NEXT: leaq (%r14,%r11), %rax
|
||||
; CHECK-NEXT: leaq (%r9,%rax), %r10
|
||||
; CHECK-NEXT: addq %rax, %r10
|
||||
; CHECK-NEXT: movq X(%rip), %rax
|
||||
; CHECK-NEXT: addq %rsi, %rbx
|
||||
; CHECK-NEXT: addq %rbx, %rdi
|
||||
|
@ -119,9 +119,9 @@ define fastcc i64 @foo() nounwind {
|
|||
; CHECK-NEXT: addq %rdi, %rbx
|
||||
; CHECK-NEXT: addq %r8, %rax
|
||||
; CHECK-NEXT: addq %rbx, %rax
|
||||
; CHECK-NEXT: leaq (%r9,%r14), %r8
|
||||
; CHECK-NEXT: addq %r8, %r8
|
||||
; CHECK-NEXT: addq %r10, %r8
|
||||
; CHECK-NEXT: leaq (%r9,%r14), %rbx
|
||||
; CHECK-NEXT: leaq (%r10,%rbx), %r8
|
||||
; CHECK-NEXT: addq %rbx, %r8
|
||||
; CHECK-NEXT: movq X(%rip), %rbx
|
||||
; CHECK-NEXT: addq %rdi, %rax
|
||||
; CHECK-NEXT: addq %rax, %r11
|
||||
|
@ -132,9 +132,9 @@ define fastcc i64 @foo() nounwind {
|
|||
; CHECK-NEXT: addq %r11, %rax
|
||||
; CHECK-NEXT: addq %rdx, %rbx
|
||||
; CHECK-NEXT: addq %rax, %rbx
|
||||
; CHECK-NEXT: leaq (%r10,%r9), %r15
|
||||
; CHECK-NEXT: addq %r15, %r15
|
||||
; CHECK-NEXT: addq %r8, %r15
|
||||
; CHECK-NEXT: leaq (%r10,%r9), %rax
|
||||
; CHECK-NEXT: leaq (%r8,%rax), %r15
|
||||
; CHECK-NEXT: addq %rax, %r15
|
||||
; CHECK-NEXT: movq X(%rip), %rax
|
||||
; CHECK-NEXT: addq %r11, %rbx
|
||||
; CHECK-NEXT: addq %rbx, %r14
|
||||
|
@ -145,9 +145,9 @@ define fastcc i64 @foo() nounwind {
|
|||
; CHECK-NEXT: addq %r14, %rbx
|
||||
; CHECK-NEXT: addq %rsi, %rax
|
||||
; CHECK-NEXT: addq %rbx, %rax
|
||||
; CHECK-NEXT: leaq (%r8,%r10), %rsi
|
||||
; CHECK-NEXT: addq %rsi, %rsi
|
||||
; CHECK-NEXT: addq %r15, %rsi
|
||||
; CHECK-NEXT: leaq (%r8,%r10), %rbx
|
||||
; CHECK-NEXT: leaq (%r15,%rbx), %rsi
|
||||
; CHECK-NEXT: addq %rbx, %rsi
|
||||
; CHECK-NEXT: movq X(%rip), %rbx
|
||||
; CHECK-NEXT: addq %r14, %rax
|
||||
; CHECK-NEXT: addq %rax, %r9
|
||||
|
@ -158,9 +158,9 @@ define fastcc i64 @foo() nounwind {
|
|||
; CHECK-NEXT: addq %r9, %rax
|
||||
; CHECK-NEXT: addq %rdi, %rbx
|
||||
; CHECK-NEXT: addq %rax, %rbx
|
||||
; CHECK-NEXT: leaq (%r15,%r8), %r12
|
||||
; CHECK-NEXT: addq %r12, %r12
|
||||
; CHECK-NEXT: addq %rsi, %r12
|
||||
; CHECK-NEXT: leaq (%r15,%r8), %rax
|
||||
; CHECK-NEXT: leaq (%rsi,%rax), %r12
|
||||
; CHECK-NEXT: addq %rax, %r12
|
||||
; CHECK-NEXT: movq X(%rip), %rcx
|
||||
; CHECK-NEXT: addq %r9, %rbx
|
||||
; CHECK-NEXT: addq %rbx, %r10
|
||||
|
@ -171,9 +171,9 @@ define fastcc i64 @foo() nounwind {
|
|||
; CHECK-NEXT: addq %r10, %rax
|
||||
; CHECK-NEXT: addq %r11, %rcx
|
||||
; CHECK-NEXT: addq %rax, %rcx
|
||||
; CHECK-NEXT: leaq (%rsi,%r15), %rax
|
||||
; CHECK-NEXT: addq %rax, %rax
|
||||
; CHECK-NEXT: addq %r12, %rax
|
||||
; CHECK-NEXT: leaq (%rsi,%r15), %rbx
|
||||
; CHECK-NEXT: leaq (%r12,%rbx), %rax
|
||||
; CHECK-NEXT: addq %rbx, %rax
|
||||
; CHECK-NEXT: movq X(%rip), %rbx
|
||||
; CHECK-NEXT: addq %r10, %rcx
|
||||
; CHECK-NEXT: addq %rcx, %r8
|
||||
|
@ -184,9 +184,9 @@ define fastcc i64 @foo() nounwind {
|
|||
; CHECK-NEXT: addq %r8, %rcx
|
||||
; CHECK-NEXT: addq %r14, %rbx
|
||||
; CHECK-NEXT: addq %rcx, %rbx
|
||||
; CHECK-NEXT: leaq (%r12,%rsi), %rcx
|
||||
; CHECK-NEXT: addq %rcx, %rcx
|
||||
; CHECK-NEXT: addq %rax, %rcx
|
||||
; CHECK-NEXT: leaq (%r12,%rsi), %rdx
|
||||
; CHECK-NEXT: leaq (%rax,%rdx), %rcx
|
||||
; CHECK-NEXT: addq %rdx, %rcx
|
||||
; CHECK-NEXT: movq X(%rip), %rdx
|
||||
; CHECK-NEXT: addq %r8, %rbx
|
||||
; CHECK-NEXT: addq %rbx, %r15
|
||||
|
@ -197,9 +197,9 @@ define fastcc i64 @foo() nounwind {
|
|||
; CHECK-NEXT: addq %r15, %rbx
|
||||
; CHECK-NEXT: addq %r9, %rdx
|
||||
; CHECK-NEXT: addq %rbx, %rdx
|
||||
; CHECK-NEXT: leaq (%rax,%r12), %rbx
|
||||
; CHECK-NEXT: addq %rbx, %rbx
|
||||
; CHECK-NEXT: addq %rcx, %rbx
|
||||
; CHECK-NEXT: leaq (%rax,%r12), %r9
|
||||
; CHECK-NEXT: leaq (%rcx,%r9), %rbx
|
||||
; CHECK-NEXT: addq %r9, %rbx
|
||||
; CHECK-NEXT: addq %r15, %rdx
|
||||
; CHECK-NEXT: addq %rdx, %rsi
|
||||
; CHECK-NEXT: addq %rcx, %rbx
|
||||
|
@ -211,12 +211,12 @@ define fastcc i64 @foo() nounwind {
|
|||
; CHECK-NEXT: addq %rsi, %rdi
|
||||
; CHECK-NEXT: addq %rdi, %rdx
|
||||
; CHECK-NEXT: addq %rax, %rcx
|
||||
; CHECK-NEXT: addq %rcx, %rcx
|
||||
; CHECK-NEXT: addq %rbx, %rcx
|
||||
; CHECK-NEXT: addq %rbx, %rcx
|
||||
; CHECK-NEXT: leaq (%rbx,%rcx), %rdi
|
||||
; CHECK-NEXT: addq %rcx, %rdi
|
||||
; CHECK-NEXT: addq %rbx, %rdi
|
||||
; CHECK-NEXT: addq %rsi, %rdx
|
||||
; CHECK-NEXT: addq %rdx, %r12
|
||||
; CHECK-NEXT: addq %rdx, %rcx
|
||||
; CHECK-NEXT: addq %rdx, %rdi
|
||||
; CHECK-NEXT: addq %r15, %rsi
|
||||
; CHECK-NEXT: movq X(%rip), %rax
|
||||
; CHECK-NEXT: bswapq %rax
|
||||
|
@ -225,7 +225,7 @@ define fastcc i64 @foo() nounwind {
|
|||
; CHECK-NEXT: addq %r12, %rsi
|
||||
; CHECK-NEXT: addq %rsi, %rax
|
||||
; CHECK-NEXT: addq %r12, %rax
|
||||
; CHECK-NEXT: addq %rcx, %rax
|
||||
; CHECK-NEXT: addq %rdi, %rax
|
||||
; CHECK-NEXT: popq %rbx
|
||||
; CHECK-NEXT: popq %r12
|
||||
; CHECK-NEXT: popq %r14
|
||||
|
|
|
@ -11,14 +11,15 @@
|
|||
; subl %edx, %ecx
|
||||
; subl %eax, %ecx
|
||||
|
||||
; TODO: replace lea with sub.
|
||||
; C - (A + B) --> C - A - B
|
||||
define i32 @test1(i32* %p, i32 %a, i32 %b, i32 %c) {
|
||||
; CHECK-LABEL: test1:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: # kill: def $edx killed $edx def $rdx
|
||||
; CHECK-NEXT: movl %esi, %eax
|
||||
; CHECK-NEXT: subl %edx, %ecx
|
||||
; CHECK-NEXT: subl %eax, %ecx
|
||||
; CHECK-NEXT: leal (%rdx,%rax), %esi
|
||||
; CHECK-NEXT: subl %esi, %ecx
|
||||
; CHECK-NEXT: movl %ecx, (%rdi)
|
||||
; CHECK-NEXT: subl %edx, %eax
|
||||
; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
|
||||
|
@ -31,15 +32,16 @@ entry:
|
|||
ret i32 %sub1
|
||||
}
|
||||
|
||||
; TODO: replace lea with add.
|
||||
; (A + B) + C --> C + A + B
|
||||
define i32 @test2(i32* %p, i32 %a, i32 %b, i32 %c) {
|
||||
; CHECK-LABEL: test2:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: # kill: def $edx killed $edx def $rdx
|
||||
; CHECK-NEXT: movl %esi, %eax
|
||||
; CHECK-NEXT: addl %eax, %ecx
|
||||
; CHECK-NEXT: addl %edx, %ecx
|
||||
; CHECK-NEXT: movl %ecx, (%rdi)
|
||||
; CHECK-NEXT: leal (%rax,%rdx), %esi
|
||||
; CHECK-NEXT: addl %ecx, %esi
|
||||
; CHECK-NEXT: movl %esi, (%rdi)
|
||||
; CHECK-NEXT: subl %edx, %eax
|
||||
; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
|
||||
; CHECK-NEXT: retq
|
||||
|
@ -51,15 +53,16 @@ entry:
|
|||
ret i32 %sub1
|
||||
}
|
||||
|
||||
; TODO: replace lea with add.
|
||||
; C + (A + B) --> C + A + B
|
||||
define i32 @test3(i32* %p, i32 %a, i32 %b, i32 %c) {
|
||||
; CHECK-LABEL: test3:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: # kill: def $edx killed $edx def $rdx
|
||||
; CHECK-NEXT: movl %esi, %eax
|
||||
; CHECK-NEXT: addl %eax, %ecx
|
||||
; CHECK-NEXT: addl %edx, %ecx
|
||||
; CHECK-NEXT: movl %ecx, (%rdi)
|
||||
; CHECK-NEXT: leal (%rax,%rdx), %esi
|
||||
; CHECK-NEXT: addl %ecx, %esi
|
||||
; CHECK-NEXT: movl %esi, (%rdi)
|
||||
; CHECK-NEXT: subl %edx, %eax
|
||||
; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
|
||||
; CHECK-NEXT: retq
|
||||
|
@ -92,12 +95,13 @@ entry:
|
|||
ret i32 %sub1
|
||||
}
|
||||
|
||||
; TODO: replace lea with sub.
|
||||
define i64 @test5(i64* %p, i64 %a, i64 %b, i64 %c) {
|
||||
; CHECK-LABEL: test5:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: movq (%rdi), %rax
|
||||
; CHECK-NEXT: subq %rdx, %rcx
|
||||
; CHECK-NEXT: subq %rax, %rcx
|
||||
; CHECK-NEXT: leaq (%rdx,%rax), %rsi
|
||||
; CHECK-NEXT: subq %rsi, %rcx
|
||||
; CHECK-NEXT: movq %rcx, (%rdi)
|
||||
; CHECK-NEXT: subq %rdx, %rax
|
||||
; CHECK-NEXT: retq
|
||||
|
@ -110,13 +114,14 @@ entry:
|
|||
ret i64 %sub1
|
||||
}
|
||||
|
||||
; TODO: replace lea with add.
|
||||
define i64 @test6(i64* %p, i64 %a, i64 %b, i64 %c) {
|
||||
; CHECK-LABEL: test6:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: movq (%rdi), %rax
|
||||
; CHECK-NEXT: addq %rdx, %rcx
|
||||
; CHECK-NEXT: addq %rax, %rcx
|
||||
; CHECK-NEXT: movq %rcx, (%rdi)
|
||||
; CHECK-NEXT: leaq (%rdx,%rax), %rsi
|
||||
; CHECK-NEXT: addq %rcx, %rsi
|
||||
; CHECK-NEXT: movq %rsi, (%rdi)
|
||||
; CHECK-NEXT: subq %rdx, %rax
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
|
@ -128,13 +133,14 @@ entry:
|
|||
ret i64 %sub1
|
||||
}
|
||||
|
||||
; TODO: replace lea with add.
|
||||
define i64 @test7(i64* %p, i64 %a, i64 %b, i64 %c) {
|
||||
; CHECK-LABEL: test7:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: movq (%rdi), %rax
|
||||
; CHECK-NEXT: addq %rdx, %rcx
|
||||
; CHECK-NEXT: addq %rax, %rcx
|
||||
; CHECK-NEXT: movq %rcx, (%rdi)
|
||||
; CHECK-NEXT: leaq (%rdx,%rax), %rsi
|
||||
; CHECK-NEXT: addq %rcx, %rsi
|
||||
; CHECK-NEXT: movq %rsi, (%rdi)
|
||||
; CHECK-NEXT: subq %rdx, %rax
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
|
@ -146,39 +152,3 @@ entry:
|
|||
ret i64 %sub1
|
||||
}
|
||||
|
||||
; The sub instruction generated flags is used by following branch,
|
||||
; so it should not be transformed.
|
||||
define i64 @test8(i64* %p, i64 %a, i64 %b, i64 %c) {
|
||||
; CHECK-LABEL: test8:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: movq (%rdi), %rax
|
||||
; CHECK-NEXT: leaq (%rdx,%rax), %rsi
|
||||
; CHECK-NEXT: subq %rsi, %rcx
|
||||
; CHECK-NEXT: ja .LBB7_2
|
||||
; CHECK-NEXT: # %bb.1: # %then
|
||||
; CHECK-NEXT: movq %rcx, (%rdi)
|
||||
; CHECK-NEXT: subq %rdx, %rax
|
||||
; CHECK-NEXT: retq
|
||||
; CHECK-NEXT: .LBB7_2: # %else
|
||||
; CHECK-NEXT: movq $0, (%rdi)
|
||||
; CHECK-NEXT: subq %rdx, %rax
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%ld = load i64, i64* %p, align 8
|
||||
%0 = add i64 %b, %ld
|
||||
%sub = sub i64 %c, %0
|
||||
%cond = icmp ule i64 %c, %0
|
||||
br i1 %cond, label %then, label %else
|
||||
|
||||
then:
|
||||
store i64 %sub, i64* %p, align 8
|
||||
br label %endif
|
||||
|
||||
else:
|
||||
store i64 0, i64* %p, align 8
|
||||
br label %endif
|
||||
|
||||
endif:
|
||||
%sub1 = sub i64 %ld, %b
|
||||
ret i64 %sub1
|
||||
}
|
||||
|
|
|
@ -53,9 +53,9 @@ define void @test(<16 x i32> %a0, <16 x i32> %b0, <16 x i32> %a1, <16 x i32> %b1
|
|||
; X86-NEXT: addl %ecx, %edx
|
||||
; X86-NEXT: kmovw %k1, %ecx
|
||||
; X86-NEXT: addl %edi, %ecx
|
||||
; X86-NEXT: addl %ecx, %eax
|
||||
; X86-NEXT: addl %edx, %eax
|
||||
; X86-NEXT: movw %ax, (%esi)
|
||||
; X86-NEXT: addl %eax, %ecx
|
||||
; X86-NEXT: addl %edx, %ecx
|
||||
; X86-NEXT: movw %cx, (%esi)
|
||||
; X86-NEXT: leal -8(%ebp), %esp
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: popl %edi
|
||||
|
@ -107,10 +107,10 @@ define void @test(<16 x i32> %a0, <16 x i32> %b0, <16 x i32> %a1, <16 x i32> %b1
|
|||
; X64-NEXT: kmovw %k1, %ebx
|
||||
; X64-NEXT: addl %edi, %eax
|
||||
; X64-NEXT: addl %ecx, %edx
|
||||
; X64-NEXT: addl %ebx, %eax
|
||||
; X64-NEXT: addl %esi, %eax
|
||||
; X64-NEXT: addl %edx, %eax
|
||||
; X64-NEXT: movw %ax, (%r14)
|
||||
; X64-NEXT: leal (%rbx,%rsi), %ecx
|
||||
; X64-NEXT: addl %eax, %ecx
|
||||
; X64-NEXT: addl %edx, %ecx
|
||||
; X64-NEXT: movw %cx, (%r14)
|
||||
; X64-NEXT: leaq -16(%rbp), %rsp
|
||||
; X64-NEXT: popq %rbx
|
||||
; X64-NEXT: popq %r14
|
||||
|
|
Loading…
Reference in New Issue