forked from OSchip/llvm-project
[InstCombine] Optimize `atomicrmw <op>, 0` into `load atomic` when possible
This commit teaches InstCombine how to replace an atomicrmw operation into a simple load atomic. For a given `atomicrmw <op>`, this is possible when: 1. The ordering of that operation is compatible with a load (i.e., anything that doesn't have a release semantic). 2. <op> does not modify the value being stored Differential Revision: https://reviews.llvm.org/D57854 llvm-svn: 353471
This commit is contained in:
parent
82bf8e82c9
commit
96f54de8ff
|
@ -5,6 +5,7 @@ add_public_tablegen_target(InstCombineTableGen)
|
|||
add_llvm_library(LLVMInstCombine
|
||||
InstructionCombining.cpp
|
||||
InstCombineAddSub.cpp
|
||||
InstCombineAtomicRMW.cpp
|
||||
InstCombineAndOrXor.cpp
|
||||
InstCombineCalls.cpp
|
||||
InstCombineCasts.cpp
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
//===- InstCombineAtomicRMW.cpp -------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file implements the visit functions for atomic rmw instructions.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#include "InstCombineInternal.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
Instruction *InstCombiner::visitAtomicRMWInst(AtomicRMWInst &RMWI) {
|
||||
switch (RMWI.getOperation()) {
|
||||
default:
|
||||
break;
|
||||
case AtomicRMWInst::Add:
|
||||
case AtomicRMWInst::Sub:
|
||||
case AtomicRMWInst::Or:
|
||||
// Replace atomicrmw <op> addr, 0 => load atomic addr.
|
||||
|
||||
// Volatile RMWs perform a load and a store, we cannot replace
|
||||
// this by just a load.
|
||||
if (RMWI.isVolatile())
|
||||
break;
|
||||
|
||||
auto *CI = dyn_cast<ConstantInt>(RMWI.getValOperand());
|
||||
if (!CI || !CI->isZero())
|
||||
break;
|
||||
// Check if the required ordering is compatible with an
|
||||
// atomic load.
|
||||
AtomicOrdering Ordering = RMWI.getOrdering();
|
||||
assert(Ordering != AtomicOrdering::NotAtomic &&
|
||||
Ordering != AtomicOrdering::Unordered &&
|
||||
"AtomicRMWs don't make sense with Unordered or NotAtomic");
|
||||
if (Ordering != AtomicOrdering::Acquire &&
|
||||
Ordering != AtomicOrdering::Monotonic)
|
||||
break;
|
||||
LoadInst *Load = new LoadInst(RMWI.getType(), RMWI.getPointerOperand());
|
||||
Load->setAtomic(Ordering, RMWI.getSyncScopeID());
|
||||
return Load;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
|
@ -401,6 +401,7 @@ public:
|
|||
Instruction *visitFree(CallInst &FI);
|
||||
Instruction *visitLoadInst(LoadInst &LI);
|
||||
Instruction *visitStoreInst(StoreInst &SI);
|
||||
Instruction *visitAtomicRMWInst(AtomicRMWInst &SI);
|
||||
Instruction *visitBranchInst(BranchInst &BI);
|
||||
Instruction *visitFenceInst(FenceInst &FI);
|
||||
Instruction *visitSwitchInst(SwitchInst &SI);
|
||||
|
|
|
@ -0,0 +1,84 @@
|
|||
; RUN: opt -instcombine -S -o - %s | FileCheck %s
|
||||
; Check that we can replace `atomicrmw <op> LHS, 0` with `load atomic LHS`.
|
||||
; This is possible when:
|
||||
; - <op> LHS, 0 == LHS
|
||||
; - the ordering of atomicrmw is compatible with a load (i.e., no release semantic)
|
||||
|
||||
; CHECK-LABEL: atomic_add_zero
|
||||
; CHECK-NEXT: %res = load atomic i32, i32* %addr monotonic, align 4
|
||||
; CHECK-NEXT: ret i32 %res
|
||||
define i32 @atomic_add_zero(i32* %addr) {
|
||||
%res = atomicrmw add i32* %addr, i32 0 monotonic
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; Don't transform volatile atomicrmw. This would eliminate a volatile store
|
||||
; otherwise.
|
||||
; CHECK-LABEL: atomic_sub_zero_volatile
|
||||
; CHECK-NEXT: %res = atomicrmw volatile sub i64* %addr, i64 0 acquire
|
||||
; CHECK-NEXT: ret i64 %res
|
||||
define i64 @atomic_sub_zero_volatile(i64* %addr) {
|
||||
%res = atomicrmw volatile sub i64* %addr, i64 0 acquire
|
||||
ret i64 %res
|
||||
}
|
||||
|
||||
|
||||
; Check that the transformation properly preserve the syncscope.
|
||||
; CHECK-LABEL: atomic_or_zero
|
||||
; CHECK-NEXT: %res = load atomic i16, i16* %addr syncscope("some_syncscope") acquire, align 2
|
||||
; CHECK-NEXT: ret i16 %res
|
||||
define i16 @atomic_or_zero(i16* %addr) {
|
||||
%res = atomicrmw or i16* %addr, i16 0 syncscope("some_syncscope") acquire
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
; Don't transform seq_cst ordering.
|
||||
; By eliminating the store part of the atomicrmw, we would get rid of the
|
||||
; release semantic, which is incorrect.
|
||||
; CHECK-LABEL: atomic_or_zero_seq_cst
|
||||
; CHECK-NEXT: %res = atomicrmw or i16* %addr, i16 0 seq_cst
|
||||
; CHECK-NEXT: ret i16 %res
|
||||
define i16 @atomic_or_zero_seq_cst(i16* %addr) {
|
||||
%res = atomicrmw or i16* %addr, i16 0 seq_cst
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
; Check that the transformation does not apply when the value is changed by
|
||||
; the atomic operation (non zero constant).
|
||||
; CHECK-LABEL: atomic_or_non_zero
|
||||
; CHECK-NEXT: %res = atomicrmw or i16* %addr, i16 2 monotonic
|
||||
; CHECK-NEXT: ret i16 %res
|
||||
define i16 @atomic_or_non_zero(i16* %addr) {
|
||||
%res = atomicrmw or i16* %addr, i16 2 monotonic
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
; Check that the transformation does not apply when the value is changed by
|
||||
; the atomic operation (xor operation with zero).
|
||||
; CHECK-LABEL: atomic_xor_zero
|
||||
; CHECK-NEXT: %res = atomicrmw xor i16* %addr, i16 0 monotonic
|
||||
; CHECK-NEXT: ret i16 %res
|
||||
define i16 @atomic_xor_zero(i16* %addr) {
|
||||
%res = atomicrmw xor i16* %addr, i16 0 monotonic
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
; Check that the transformation does not apply when the ordering is
|
||||
; incompatible with a load (release).
|
||||
; CHECK-LABEL: atomic_or_zero_release
|
||||
; CHECK-NEXT: %res = atomicrmw or i16* %addr, i16 0 release
|
||||
; CHECK-NEXT: ret i16 %res
|
||||
define i16 @atomic_or_zero_release(i16* %addr) {
|
||||
%res = atomicrmw or i16* %addr, i16 0 release
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
; Check that the transformation does not apply when the ordering is
|
||||
; incompatible with a load (acquire, release).
|
||||
; CHECK-LABEL: atomic_or_zero_acq_rel
|
||||
; CHECK-NEXT: %res = atomicrmw or i16* %addr, i16 0 acq_rel
|
||||
; CHECK-NEXT: ret i16 %res
|
||||
define i16 @atomic_or_zero_acq_rel(i16* %addr) {
|
||||
%res = atomicrmw or i16* %addr, i16 0 acq_rel
|
||||
ret i16 %res
|
||||
}
|
Loading…
Reference in New Issue