[CodeGen] Don't require AA in SDAGISel at -O0.

Before r247167, the pass manager builder controlled which AA
implementations were used, exporting them all in the AliasAnalysis
analysis group.

Now, AAResultsWrapperPass always uses BasicAA, but still uses other AA
implementations if made available in the pass pipeline.

But regardless, SDAGISel is required at O0, and really doesn't need to
be doing fancy optimizations based on useful AA results.

Don't require AA at CodeGenOpt::None, and only use it otherwise.

This does have a functional impact (and one testcase is pessimized
because we can't reuse a load).  But I think that's desirable no matter
what.

Note that this alone doesn't result in less DT computations: TwoAddress
was previously able to reuse the DT we computed for SDAG.  That will be
fixed separately.

Differential Revision: https://reviews.llvm.org/D32766

llvm-svn: 302611
This commit is contained in:
Ahmed Bougacha 2017-05-10 00:39:30 +00:00
parent 8c358e3016
commit 604526fe87
7 changed files with 75 additions and 68 deletions

View File

@ -406,7 +406,7 @@ public:
/// certain types of nodes together, or eliminating superfluous nodes. The
/// Level argument controls whether Combine is allowed to produce nodes and
/// types that are illegal on the target.
void Combine(CombineLevel Level, AliasAnalysis &AA,
void Combine(CombineLevel Level, AliasAnalysis *AA,
CodeGenOpt::Level OptLevel);
/// This transforms the SelectionDAG into a SelectionDAG that

View File

@ -114,7 +114,7 @@ namespace {
SmallPtrSet<SDNode *, 32> CombinedNodes;
// AA - Used for DAG load/store alias analysis.
AliasAnalysis &AA;
AliasAnalysis *AA;
/// When an instruction is simplified, add all users of the instruction to
/// the work lists because they might get more simplified now.
@ -496,9 +496,9 @@ namespace {
SDValue distributeTruncateThroughAnd(SDNode *N);
public:
DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
: DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {
OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(AA) {
ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize();
MaximumLegalStoreInBits = 0;
@ -16435,17 +16435,17 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
UseAA = false;
#endif
if (UseAA &&
if (UseAA && AA &&
Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
// Use alias analysis information.
int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
int64_t Overlap0 = NumBytes0 + SrcValOffset0 - MinOffset;
int64_t Overlap1 = NumBytes1 + SrcValOffset1 - MinOffset;
AliasResult AAResult =
AA.alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0,
UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1,
UseTBAA ? Op1->getAAInfo() : AAMDNodes()));
AA->alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0,
UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1,
UseTBAA ? Op1->getAAInfo() : AAMDNodes()) );
if (AAResult == NoAlias)
return false;
}
@ -16659,7 +16659,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
}
/// This is the entry point for the file.
void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA,
void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
CodeGenOpt::Level OptLevel) {
/// This is the main entry point to this class.
DAGCombiner(*this, AA, OptLevel).Run(Level);

View File

@ -811,9 +811,9 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
}
}
void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa,
void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis *aa,
const TargetLibraryInfo *li) {
AA = &aa;
AA = aa;
GFI = gfi;
LibInfo = li;
DL = &DAG.getDataLayout();
@ -3423,7 +3423,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
if (isVolatile || NumValues > MaxParallelChains)
// Serialize volatile loads with other side effects.
Root = getRoot();
else if (AA->pointsToConstantMemory(MemoryLocation(
else if (AA && AA->pointsToConstantMemory(MemoryLocation(
SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) {
// Do not serialize (non-volatile) loads of constant memory with anything.
Root = DAG.getEntryNode();
@ -3535,8 +3535,8 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
Type *Ty = I.getType();
AAMDNodes AAInfo;
I.getAAMetadata(AAInfo);
assert(!AA->pointsToConstantMemory(MemoryLocation(
SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo)) &&
assert((!AA || !AA->pointsToConstantMemory(MemoryLocation(
SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) &&
"load_from_swift_error should not be constant memory");
SmallVector<EVT, 4> ValueVTs;
@ -3817,7 +3817,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
// Do not serialize masked loads of constant memory with anything.
bool AddToChain = !AA->pointsToConstantMemory(MemoryLocation(
bool AddToChain = !AA || !AA->pointsToConstantMemory(MemoryLocation(
PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()), AAInfo));
SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
@ -3861,7 +3861,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
bool UniformBase = getUniformBase(BasePtr, Base, Index, this);
bool ConstantMemory = false;
if (UniformBase &&
AA->pointsToConstantMemory(MemoryLocation(
AA && AA->pointsToConstantMemory(MemoryLocation(
BasePtr, DAG.getDataLayout().getTypeStoreSize(I.getType()),
AAInfo))) {
// Do not serialize (non-volatile) loads of constant memory with anything.
@ -5994,7 +5994,7 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
bool ConstantMemory = false;
// Do not serialize (non-volatile) loads of constant memory with anything.
if (Builder.AA->pointsToConstantMemory(PtrVal)) {
if (Builder.AA && Builder.AA->pointsToConstantMemory(PtrVal)) {
Root = Builder.DAG.getEntryNode();
ConstantMemory = true;
} else {

View File

@ -604,11 +604,11 @@ public:
SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo,
CodeGenOpt::Level ol)
: CurInst(nullptr), SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()),
DAG(dag), FuncInfo(funcinfo),
DAG(dag), DL(nullptr), AA(nullptr), FuncInfo(funcinfo),
HasTailCall(false) {
}
void init(GCFunctionInfo *gfi, AliasAnalysis &aa,
void init(GCFunctionInfo *gfi, AliasAnalysis *AA,
const TargetLibraryInfo *li);
/// Clear out the current SelectionDAG and the associated state and prepare

View File

@ -300,7 +300,7 @@ SelectionDAGISel::SelectionDAGISel(TargetMachine &tm,
FuncInfo(new FunctionLoweringInfo()),
CurDAG(new SelectionDAG(tm, OL)),
SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)),
GFI(),
AA(), GFI(),
OptLevel(OL),
DAGSize(0) {
initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
@ -318,7 +318,8 @@ SelectionDAGISel::~SelectionDAGISel() {
}
void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AAResultsWrapperPass>();
if (OptLevel != CodeGenOpt::None)
AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<GCModuleInfo>();
AU.addRequired<StackProtector>();
AU.addPreserved<StackProtector>();
@ -395,7 +396,6 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
TII = MF->getSubtarget().getInstrInfo();
TLI = MF->getSubtarget().getTargetLowering();
RegInfo = &MF->getRegInfo();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr;
ORE = make_unique<OptimizationRemarkEmitter>(&Fn);
@ -407,12 +407,22 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
CurDAG->init(*MF, *ORE);
FuncInfo->set(Fn, *MF, CurDAG);
// Now get the optional analyzes if we want to.
// This is based on the possibly changed OptLevel (after optnone is taken
// into account). That's unfortunate but OK because it just means we won't
// ask for passes that have been required anyway.
if (UseMBPI && OptLevel != CodeGenOpt::None)
FuncInfo->BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
else
FuncInfo->BPI = nullptr;
SDB->init(GFI, *AA, LibInfo);
if (OptLevel != CodeGenOpt::None)
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
else
AA = nullptr;
SDB->init(GFI, AA, LibInfo);
MF->setHasInlineAsm(false);
@ -716,7 +726,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
{
NamedRegionTimer T("combine1", "DAG Combining 1", GroupName,
GroupDescription, TimePassesIsEnabled);
CurDAG->Combine(BeforeLegalizeTypes, *AA, OptLevel);
CurDAG->Combine(BeforeLegalizeTypes, AA, OptLevel);
}
DEBUG(dbgs() << "Optimized lowered selection DAG: BB#" << BlockNumber
@ -748,7 +758,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
{
NamedRegionTimer T("combine_lt", "DAG Combining after legalize types",
GroupName, GroupDescription, TimePassesIsEnabled);
CurDAG->Combine(AfterLegalizeTypes, *AA, OptLevel);
CurDAG->Combine(AfterLegalizeTypes, AA, OptLevel);
}
DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber
@ -782,7 +792,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
{
NamedRegionTimer T("combine_lv", "DAG Combining after legalize vectors",
GroupName, GroupDescription, TimePassesIsEnabled);
CurDAG->Combine(AfterLegalizeVectorOps, *AA, OptLevel);
CurDAG->Combine(AfterLegalizeVectorOps, AA, OptLevel);
}
DEBUG(dbgs() << "Optimized vector-legalized selection DAG: BB#"
@ -808,7 +818,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
{
NamedRegionTimer T("combine2", "DAG Combining 2", GroupName,
GroupDescription, TimePassesIsEnabled);
CurDAG->Combine(AfterLegalizeDAG, *AA, OptLevel);
CurDAG->Combine(AfterLegalizeDAG, AA, OptLevel);
}
DEBUG(dbgs() << "Optimized legalized selection DAG: BB#" << BlockNumber

View File

@ -30,15 +30,15 @@
; CHECK-NEXT: Safe Stack instrumentation pass
; CHECK-NEXT: Insert stack protectors
; CHECK-NEXT: Module Verifier
; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: X86 DAG->DAG Instruction Selection
; CHECK-NEXT: X86 PIC Global Base Reg Initialization
; CHECK-NEXT: Expand ISel Pseudo-instructions
; CHECK-NEXT: Local Stack Slot Allocation
; CHECK-NEXT: X86 WinAlloca Expander
; CHECK-NEXT: Eliminate PHI nodes for register allocation
; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: Two-Address instruction pass
; CHECK-NEXT: Fast Register Allocator
; CHECK-NEXT: Bundle Machine CFG Edges

View File

@ -30,25 +30,24 @@ define void @foo() {
; X86-O0-NEXT: subl $12, %esp
; X86-O0-NEXT: .Lcfi0:
; X86-O0-NEXT: .cfi_def_cfa_offset 16
; X86-O0-NEXT: movzbl c, %eax
; X86-O0-NEXT: testl %eax, %eax
; X86-O0-NEXT: setne %cl
; X86-O0-NEXT: movl %eax, %edx
; X86-O0-NEXT: movb %dl, %ch
; X86-O0-NEXT: testb %ch, %ch
; X86-O0-NEXT: movb c, %al
; X86-O0-NEXT: testb %al, %al
; X86-O0-NEXT: setne {{[0-9]+}}(%esp)
; X86-O0-NEXT: movzbl %cl, %edx
; X86-O0-NEXT: subl %eax, %edx
; X86-O0-NEXT: setle %cl
; X86-O0-NEXT: # implicit-def: %EAX
; X86-O0-NEXT: movb %cl, %al
; X86-O0-NEXT: andl $1, %eax
; X86-O0-NEXT: kmovd %eax, %k0
; X86-O0-NEXT: kmovd %k0, %eax
; X86-O0-NEXT: movzbl c, %ecx
; X86-O0-NEXT: testl %ecx, %ecx
; X86-O0-NEXT: setne %al
; X86-O0-NEXT: movzbl %al, %edx
; X86-O0-NEXT: subl %ecx, %edx
; X86-O0-NEXT: setle %al
; X86-O0-NEXT: # implicit-def: %ECX
; X86-O0-NEXT: movb %al, %cl
; X86-O0-NEXT: andb $1, %cl
; X86-O0-NEXT: movzbl %cl, %eax
; X86-O0-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-O0-NEXT: andl $1, %ecx
; X86-O0-NEXT: kmovd %ecx, %k0
; X86-O0-NEXT: kmovd %k0, %ecx
; X86-O0-NEXT: movb %cl, %al
; X86-O0-NEXT: andb $1, %al
; X86-O0-NEXT: movzbl %al, %ecx
; X86-O0-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-O0-NEXT: movl %edx, (%esp) # 4-byte Spill
; X86-O0-NEXT: addl $12, %esp
; X86-O0-NEXT: retl
@ -69,27 +68,25 @@ define void @foo() {
;
; X64-O0-LABEL: foo:
; X64-O0: # BB#0: # %entry
; X64-O0-NEXT: movzbl {{.*}}(%rip), %eax
; X64-O0-NEXT: movl %eax, %ecx
; X64-O0-NEXT: movb %cl, %dl
; X64-O0-NEXT: movl %ecx, %eax
; X64-O0-NEXT: testq %rcx, %rcx
; X64-O0-NEXT: setne %sil
; X64-O0-NEXT: testb %dl, %dl
; X64-O0-NEXT: movb {{.*}}(%rip), %al
; X64-O0-NEXT: testb %al, %al
; X64-O0-NEXT: setne -{{[0-9]+}}(%rsp)
; X64-O0-NEXT: movzbl %sil, %edi
; X64-O0-NEXT: subl %eax, %edi
; X64-O0-NEXT: setle %dl
; X64-O0-NEXT: # implicit-def: %EAX
; X64-O0-NEXT: movb %dl, %al
; X64-O0-NEXT: andl $1, %eax
; X64-O0-NEXT: kmovd %eax, %k0
; X64-O0-NEXT: kmovd %k0, %eax
; X64-O0-NEXT: movb %al, %dl
; X64-O0-NEXT: andb $1, %dl
; X64-O0-NEXT: movzbl %dl, %eax
; X64-O0-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
; X64-O0-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # 4-byte Spill
; X64-O0-NEXT: movzbl {{.*}}(%rip), %ecx
; X64-O0-NEXT: testl %ecx, %ecx
; X64-O0-NEXT: setne %al
; X64-O0-NEXT: movzbl %al, %edx
; X64-O0-NEXT: subl %ecx, %edx
; X64-O0-NEXT: setle %al
; X64-O0-NEXT: # implicit-def: %ECX
; X64-O0-NEXT: movb %al, %cl
; X64-O0-NEXT: andl $1, %ecx
; X64-O0-NEXT: kmovd %ecx, %k0
; X64-O0-NEXT: kmovd %k0, %ecx
; X64-O0-NEXT: movb %cl, %al
; X64-O0-NEXT: andb $1, %al
; X64-O0-NEXT: movzbl %al, %ecx
; X64-O0-NEXT: movl %ecx, -{{[0-9]+}}(%rsp)
; X64-O0-NEXT: movl %edx, -{{[0-9]+}}(%rsp) # 4-byte Spill
; X64-O0-NEXT: retq
entry:
%a = alloca i8, align 1