From db65a5b776f20795df41741bc7042d6b61070886 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 19 Mar 2019 18:27:18 +0000 Subject: [PATCH] Allow unordered loads to be considered invariant in CodeGen The actual code change is fairly straight forward, but exercising it isn't. First, it turned out we weren't adding the appropriate flags in SelectionDAG. Second, it turned out that we've got some optimization gaps, so obvious test cases don't work. My first attempt (in atomic-unordered.ll) points out a deficiency in our peephole-opt folding logic which I plan to fix separately. Instead, I'm exercising this through MachineLICM. Differential Revision: https://reviews.llvm.org/D59375 llvm-svn: 356494 --- llvm/lib/CodeGen/MachineInstr.cpp | 8 ++++--- .../SelectionDAG/SelectionDAGBuilder.cpp | 5 +++++ llvm/test/CodeGen/X86/atomic-unordered.ll | 4 ++-- llvm/test/CodeGen/X86/hoist-invariant-load.ll | 22 +++++++++++++------ 4 files changed, 27 insertions(+), 12 deletions(-) diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index 17bd0f38964f..26d58340b618 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -1312,9 +1312,11 @@ bool MachineInstr::isDereferenceableInvariantLoad(AliasAnalysis *AA) const { const MachineFrameInfo &MFI = getParent()->getParent()->getFrameInfo(); for (MachineMemOperand *MMO : memoperands()) { - if (MMO->isVolatile()) return false; - // TODO: Figure out whether isAtomic is really necessary (see D57601). - if (MMO->isAtomic()) return false; + if (!MMO->isUnordered()) + // If the memory operand has ordering side effects, we can't move the + // instruction. Such an instruction is technically an invariant load, + // but the caller code would need updated to expect that. + return false; if (MMO->isStore()) return false; if (MMO->isInvariant() && MMO->isDereferenceable()) continue; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 3aedee8db0bc..6d7f2840adfb 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4564,6 +4564,11 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { auto Flags = MachineMemOperand::MOLoad; if (I.isVolatile()) Flags |= MachineMemOperand::MOVolatile; + if (I.getMetadata(LLVMContext::MD_invariant_load) != nullptr) + Flags |= MachineMemOperand::MOInvariant; + if (isDereferenceablePointer(I.getPointerOperand(), DAG.getDataLayout())) + Flags |= MachineMemOperand::MODereferenceable; + Flags |= TLI.getMMOFlags(I); MachineMemOperand *MMO = diff --git a/llvm/test/CodeGen/X86/atomic-unordered.ll b/llvm/test/CodeGen/X86/atomic-unordered.ll index 105aa0f23e79..a85ddf754161 100644 --- a/llvm/test/CodeGen/X86/atomic-unordered.ll +++ b/llvm/test/CodeGen/X86/atomic-unordered.ll @@ -2546,7 +2546,7 @@ define i64 @fold_constant_fence(i64 %arg) { ret i64 %ret } -define i64 @fold_invariant_clobber(i64* %p, i64 %arg) { +define i64 @fold_invariant_clobber(i64* dereferenceable(8) %p, i64 %arg) { ; CHECK-O0-LABEL: fold_invariant_clobber: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax @@ -2567,7 +2567,7 @@ define i64 @fold_invariant_clobber(i64* %p, i64 %arg) { } -define i64 @fold_invariant_fence(i64* %p, i64 %arg) { +define i64 @fold_invariant_fence(i64* dereferenceable(8) %p, i64 %arg) { ; CHECK-O0-LABEL: fold_invariant_fence: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rdi diff --git a/llvm/test/CodeGen/X86/hoist-invariant-load.ll b/llvm/test/CodeGen/X86/hoist-invariant-load.ll index de68234d725e..b2b26d9dc0ea 100644 --- a/llvm/test/CodeGen/X86/hoist-invariant-load.ll +++ b/llvm/test/CodeGen/X86/hoist-invariant-load.ll @@ -73,27 +73,35 @@ define void @test_unordered(i8* %x) uwtable ssp { ; CHECK: ## %bb.0: ## %entry ; CHECK-NEXT: pushq %rbp ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %r15 ; CHECK-NEXT: .cfi_def_cfa_offset 24 -; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: pushq %r14 ; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: .cfi_offset %rbx, -32 -; CHECK-NEXT: .cfi_offset %r14, -24 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 40 +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset %rbx, -40 +; CHECK-NEXT: .cfi_offset %r14, -32 +; CHECK-NEXT: .cfi_offset %r15, -24 ; CHECK-NEXT: .cfi_offset %rbp, -16 ; CHECK-NEXT: movq %rdi, %rbx ; CHECK-NEXT: movl $10000, %ebp ## imm = 0x2710 -; CHECK-NEXT: movq _objc_msgSend@{{.*}}(%rip), %r14 +; CHECK-NEXT: movq {{.*}}(%rip), %r14 +; CHECK-NEXT: movq _objc_msgSend@{{.*}}(%rip), %r15 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB1_1: ## %for.body ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movq {{.*}}(%rip), %rsi ; CHECK-NEXT: movq %rbx, %rdi -; CHECK-NEXT: callq *%r14 +; CHECK-NEXT: movq %r14, %rsi +; CHECK-NEXT: callq *%r15 ; CHECK-NEXT: decl %ebp ; CHECK-NEXT: jne LBB1_1 ; CHECK-NEXT: ## %bb.2: ## %for.end +; CHECK-NEXT: addq $8, %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: retq entry: