forked from OSchip/llvm-project
[FastISel][X86] Add MachineMemOperand to load/store instructions.
This commit adds MachineMemOperands to load and store instructions. This allows the peephole optimizer to fold load instructions. Unfortunatelly the peephole optimizer currently doesn't run at -O0. llvm-svn: 210858
This commit is contained in:
parent
09b61ebcbe
commit
349777d3ea
|
@ -377,6 +377,9 @@ protected:
|
|||
/// Test whether the given value has exactly one use.
|
||||
bool hasTrivialKill(const Value *V) const;
|
||||
|
||||
/// \brief Create a machine mem operand from the given instruction.
|
||||
MachineMemOperand *createMachineMemOperandFor(const Instruction *I) const;
|
||||
|
||||
private:
|
||||
bool SelectBinaryOp(const User *I, unsigned ISDOpcode);
|
||||
|
||||
|
|
|
@ -1737,3 +1737,47 @@ bool FastISel::canFoldAddIntoGEP(const User *GEP, const Value *Add) {
|
|||
return isa<ConstantInt>(cast<AddOperator>(Add)->getOperand(1));
|
||||
}
|
||||
|
||||
MachineMemOperand *
|
||||
FastISel::createMachineMemOperandFor(const Instruction *I) const {
|
||||
const Value *Ptr;
|
||||
Type *ValTy;
|
||||
unsigned Alignment;
|
||||
unsigned Flags;
|
||||
bool IsVolatile;
|
||||
|
||||
if (const auto *LI = dyn_cast<LoadInst>(I)) {
|
||||
Alignment = LI->getAlignment();
|
||||
IsVolatile = LI->isVolatile();
|
||||
Flags = MachineMemOperand::MOLoad;
|
||||
Ptr = LI->getPointerOperand();
|
||||
ValTy = LI->getType();
|
||||
} else if (const auto *SI = dyn_cast<StoreInst>(I)) {
|
||||
Alignment = SI->getAlignment();
|
||||
IsVolatile = SI->isVolatile();
|
||||
Flags = MachineMemOperand::MOStore;
|
||||
Ptr = SI->getPointerOperand();
|
||||
ValTy = SI->getValueOperand()->getType();
|
||||
} else {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
bool IsNonTemporal = I->getMetadata("nontemporal") != nullptr;
|
||||
bool IsInvariant = I->getMetadata("invariant.load") != nullptr;
|
||||
const MDNode *TBAAInfo = I->getMetadata(LLVMContext::MD_tbaa);
|
||||
const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range);
|
||||
|
||||
if (Alignment == 0) // Ensure that codegen never sees alignment 0.
|
||||
Alignment = DL.getABITypeAlignment(ValTy);
|
||||
|
||||
unsigned Size = TM.getDataLayout()->getTypeStoreSize(ValTy);
|
||||
|
||||
if (IsVolatile)
|
||||
Flags |= MachineMemOperand::MOVolatile;
|
||||
if (IsNonTemporal)
|
||||
Flags |= MachineMemOperand::MONonTemporal;
|
||||
if (IsInvariant)
|
||||
Flags |= MachineMemOperand::MOInvariant;
|
||||
|
||||
return FuncInfo.MF->getMachineMemOperand(MachinePointerInfo(Ptr), Flags, Size,
|
||||
Alignment, TBAAInfo, Ranges);
|
||||
}
|
||||
|
|
|
@ -78,12 +78,14 @@ public:
|
|||
private:
|
||||
bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT);
|
||||
|
||||
bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR);
|
||||
bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, MachineMemOperand *MMO,
|
||||
unsigned &ResultReg);
|
||||
|
||||
bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM,
|
||||
bool Aligned = false);
|
||||
bool X86FastEmitStore(EVT VT, unsigned ValReg, const X86AddressMode &AM,
|
||||
bool Aligned = false);
|
||||
MachineMemOperand *MMO = nullptr, bool Aligned = false);
|
||||
bool X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
|
||||
const X86AddressMode &AM,
|
||||
MachineMemOperand *MMO = nullptr, bool Aligned = false);
|
||||
|
||||
bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
|
||||
unsigned &ResultReg);
|
||||
|
@ -180,7 +182,7 @@ bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
|
|||
/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
|
||||
/// Return true and the result register by reference if it is possible.
|
||||
bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
|
||||
unsigned &ResultReg) {
|
||||
MachineMemOperand *MMO, unsigned &ResultReg) {
|
||||
// Get opcode and regclass of the output for the given load instruction.
|
||||
unsigned Opc = 0;
|
||||
const TargetRegisterClass *RC = nullptr;
|
||||
|
@ -228,8 +230,11 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
|
|||
}
|
||||
|
||||
ResultReg = createResultReg(RC);
|
||||
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
|
||||
DbgLoc, TII.get(Opc), ResultReg), AM);
|
||||
MachineInstrBuilder MIB =
|
||||
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
|
||||
addFullAddress(MIB, AM);
|
||||
if (MMO)
|
||||
MIB->addMemOperand(*FuncInfo.MF, MMO);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -237,9 +242,9 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
|
|||
/// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
|
||||
/// and a displacement offset, or a GlobalAddress,
|
||||
/// i.e. V. Return true if it is possible.
|
||||
bool
|
||||
X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg,
|
||||
const X86AddressMode &AM, bool Aligned) {
|
||||
bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
|
||||
const X86AddressMode &AM,
|
||||
MachineMemOperand *MMO, bool Aligned) {
|
||||
// Get opcode and regclass of the output for the given store instruction.
|
||||
unsigned Opc = 0;
|
||||
switch (VT.getSimpleVT().SimpleTy) {
|
||||
|
@ -249,7 +254,8 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg,
|
|||
// Mask out all but lowest bit.
|
||||
unsigned AndResult = createResultReg(&X86::GR8RegClass);
|
||||
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
||||
TII.get(X86::AND8ri), AndResult).addReg(ValReg).addImm(1);
|
||||
TII.get(X86::AND8ri), AndResult)
|
||||
.addReg(ValReg, getKillRegState(ValIsKill)).addImm(1);
|
||||
ValReg = AndResult;
|
||||
}
|
||||
// FALLTHROUGH, handling i1 as i8.
|
||||
|
@ -288,13 +294,18 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg,
|
|||
break;
|
||||
}
|
||||
|
||||
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
|
||||
DbgLoc, TII.get(Opc)), AM).addReg(ValReg);
|
||||
MachineInstrBuilder MIB =
|
||||
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
|
||||
addFullAddress(MIB, AM).addReg(ValReg, getKillRegState(ValIsKill));
|
||||
if (MMO)
|
||||
MIB->addMemOperand(*FuncInfo.MF, MMO);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
|
||||
const X86AddressMode &AM, bool Aligned) {
|
||||
const X86AddressMode &AM,
|
||||
MachineMemOperand *MMO, bool Aligned) {
|
||||
// Handle 'null' like i32/i64 0.
|
||||
if (isa<ConstantPointerNull>(Val))
|
||||
Val = Constant::getNullValue(DL.getIntPtrType(Val->getContext()));
|
||||
|
@ -317,10 +328,12 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
|
|||
}
|
||||
|
||||
if (Opc) {
|
||||
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
|
||||
DbgLoc, TII.get(Opc)), AM)
|
||||
.addImm(Signed ? (uint64_t) CI->getSExtValue() :
|
||||
CI->getZExtValue());
|
||||
MachineInstrBuilder MIB =
|
||||
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
|
||||
addFullAddress(MIB, AM).addImm(Signed ? (uint64_t) CI->getSExtValue()
|
||||
: CI->getZExtValue());
|
||||
if (MMO)
|
||||
MIB->addMemOperand(*FuncInfo.MF, MMO);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -329,7 +342,8 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
|
|||
if (ValReg == 0)
|
||||
return false;
|
||||
|
||||
return X86FastEmitStore(VT, ValReg, AM, Aligned);
|
||||
bool ValKill = hasTrivialKill(Val);
|
||||
return X86FastEmitStore(VT, ValReg, ValKill, AM, MMO, Aligned);
|
||||
}
|
||||
|
||||
/// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
|
||||
|
@ -740,19 +754,24 @@ bool X86FastISel::X86SelectStore(const Instruction *I) {
|
|||
if (S->isAtomic())
|
||||
return false;
|
||||
|
||||
unsigned SABIAlignment =
|
||||
DL.getABITypeAlignment(S->getValueOperand()->getType());
|
||||
bool Aligned = S->getAlignment() == 0 || S->getAlignment() >= SABIAlignment;
|
||||
const Value *Val = S->getValueOperand();
|
||||
const Value *Ptr = S->getPointerOperand();
|
||||
|
||||
MVT VT;
|
||||
if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true))
|
||||
if (!isTypeLegal(Val->getType(), VT, /*AllowI1=*/true))
|
||||
return false;
|
||||
|
||||
unsigned Alignment = S->getAlignment();
|
||||
unsigned ABIAlignment = DL.getABITypeAlignment(Val->getType());
|
||||
if (Alignment == 0) // Ensure that codegen never sees alignment 0
|
||||
Alignment = ABIAlignment;
|
||||
bool Aligned = Alignment >= ABIAlignment;
|
||||
|
||||
X86AddressMode AM;
|
||||
if (!X86SelectAddress(I->getOperand(1), AM))
|
||||
if (!X86SelectAddress(Ptr, AM))
|
||||
return false;
|
||||
|
||||
return X86FastEmitStore(VT, I->getOperand(0), AM, Aligned);
|
||||
return X86FastEmitStore(VT, Val, AM, createMachineMemOperandFor(I), Aligned);
|
||||
}
|
||||
|
||||
/// X86SelectRet - Select and emit code to implement ret instructions.
|
||||
|
@ -887,25 +906,29 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
|
|||
|
||||
/// X86SelectLoad - Select and emit code to implement load instructions.
|
||||
///
|
||||
bool X86FastISel::X86SelectLoad(const Instruction *I) {
|
||||
bool X86FastISel::X86SelectLoad(const Instruction *I) {
|
||||
const LoadInst *LI = cast<LoadInst>(I);
|
||||
|
||||
// Atomic loads need special handling.
|
||||
if (cast<LoadInst>(I)->isAtomic())
|
||||
if (LI->isAtomic())
|
||||
return false;
|
||||
|
||||
MVT VT;
|
||||
if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true))
|
||||
if (!isTypeLegal(LI->getType(), VT, /*AllowI1=*/true))
|
||||
return false;
|
||||
|
||||
const Value *Ptr = LI->getPointerOperand();
|
||||
|
||||
X86AddressMode AM;
|
||||
if (!X86SelectAddress(I->getOperand(0), AM))
|
||||
if (!X86SelectAddress(Ptr, AM))
|
||||
return false;
|
||||
|
||||
unsigned ResultReg = 0;
|
||||
if (X86FastEmitLoad(VT, AM, ResultReg)) {
|
||||
UpdateValueMap(I, ResultReg);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg))
|
||||
return false;
|
||||
|
||||
UpdateValueMap(I, ResultReg);
|
||||
return true;
|
||||
}
|
||||
|
||||
static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
|
||||
|
@ -1624,8 +1647,8 @@ bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
|
|||
}
|
||||
|
||||
unsigned Reg;
|
||||
bool RV = X86FastEmitLoad(VT, SrcAM, Reg);
|
||||
RV &= X86FastEmitStore(VT, Reg, DestAM);
|
||||
bool RV = X86FastEmitLoad(VT, SrcAM, nullptr, Reg);
|
||||
RV &= X86FastEmitStore(VT, Reg, /*Kill=*/true, DestAM);
|
||||
assert(RV && "Failed to emit load or store??");
|
||||
|
||||
unsigned Size = VT.getSizeInBits()/8;
|
||||
|
@ -2322,7 +2345,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
|
|||
if (!X86FastEmitStore(ArgVT, ArgVal, AM))
|
||||
return false;
|
||||
} else {
|
||||
if (!X86FastEmitStore(ArgVT, Arg, AM))
|
||||
if (!X86FastEmitStore(ArgVT, Arg, /*ValIsKill=*/false, AM))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -2719,8 +2742,9 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
|
|||
|
||||
bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
|
||||
const LoadInst *LI) {
|
||||
const Value *Ptr = LI->getPointerOperand();
|
||||
X86AddressMode AM;
|
||||
if (!X86SelectAddress(LI->getOperand(0), AM))
|
||||
if (!X86SelectAddress(Ptr, AM))
|
||||
return false;
|
||||
|
||||
const X86InstrInfo &XII = (const X86InstrInfo&)TII;
|
||||
|
@ -2728,13 +2752,18 @@ bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
|
|||
unsigned Size = DL.getTypeAllocSize(LI->getType());
|
||||
unsigned Alignment = LI->getAlignment();
|
||||
|
||||
if (Alignment == 0) // Ensure that codegen never sees alignment 0
|
||||
Alignment = DL.getABITypeAlignment(LI->getType());
|
||||
|
||||
SmallVector<MachineOperand, 8> AddrOps;
|
||||
AM.getFullAddress(AddrOps);
|
||||
|
||||
MachineInstr *Result =
|
||||
XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment);
|
||||
if (!Result) return false;
|
||||
if (!Result)
|
||||
return false;
|
||||
|
||||
Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI));
|
||||
FuncInfo.MBB->insert(FuncInfo.InsertPt, Result);
|
||||
MI->eraseFromParent();
|
||||
return true;
|
||||
|
|
|
@ -0,0 +1,12 @@
|
|||
; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
|
||||
; RUN: llc < %s -fast-isel -fast-isel-abort -mtriple=x86_64-apple-darwin | FileCheck %s
|
||||
|
||||
define i64 @fold_load(i64* %a, i64 %b) {
|
||||
; CHECK-LABEL: fold_load
|
||||
; CHECK: addq (%rdi), %rsi
|
||||
; CHECK-NEXT: movq %rsi, %rax
|
||||
%1 = load i64* %a, align 8
|
||||
%2 = add i64 %1, %b
|
||||
ret i64 %2
|
||||
}
|
||||
|
Loading…
Reference in New Issue