From 97067d3c7321ff8a3dcb0b0f41ae06b1602787c3 Mon Sep 17 00:00:00 2001
From: Philip Reames <listmail@philipreames.com>
Date: Thu, 14 Feb 2019 18:39:14 +0000
Subject: [PATCH] Teach instcombine about remaining "idempotent" atomicrmw
 types

Expand on Quentin's r353471 patch which converts some atomicrmws into loads. Handle remaining operation types, and fix a slight bug. Atomic loads are required to have alignment. Since this was within the InstCombine fixed point, somewhere else in InstCombine was adding alignment before the verifier saw it, but still, we should fix.

Terminology wise, I'm using the "idempotent" naming that is used for the same operations in AtomicExpand and X86ISelLoweringInfo. Once this lands, I'll add similar tests for AtomicExpand, and move the pattern match function to a common location. In the review, there was seemingly consensus that "idempotent" was slightly incorrect for this context.  Once we setle on a better name, I'll update all uses at once.

Differential Revision: https://reviews.llvm.org/D58242

llvm-svn: 354046
---
 .../InstCombine/InstCombineAtomicRMW.cpp      | 88 +++++++++++++------
 llvm/test/Transforms/InstCombine/atomicrmw.ll | 21 ++---
 2 files changed, 68 insertions(+), 41 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp
index 86bbfb15986c..0c7e7ab66a9a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp
@@ -14,35 +14,65 @@
 
 using namespace llvm;
 
-Instruction *InstCombiner::visitAtomicRMWInst(AtomicRMWInst &RMWI) {
-  switch (RMWI.getOperation()) {
-  default:
-    break;
-  case AtomicRMWInst::Add:
-  case AtomicRMWInst::Sub:
-  case AtomicRMWInst::Or:
-    // Replace atomicrmw <op> addr, 0 => load atomic addr.
+namespace {
+/// Return true if and only if the given instruction does not modify the memory
+/// location referenced.  Note that an idemptent atomicrmw may still have
+/// ordering effects on nearby instructions, or be volatile.
+/// TODO: Common w/ the version in AtomicExpandPass, and change the term used.
+/// Idemptotent is confusing in this context.
+bool isIdempotentRMW(AtomicRMWInst& RMWI) {
+  auto C = dyn_cast<ConstantInt>(RMWI.getValOperand());
+  if(!C)
+    // TODO: Handle fadd, fsub?
+    return false;
 
-    // Volatile RMWs perform a load and a store, we cannot replace
-    // this by just a load.
-    if (RMWI.isVolatile())
-      break;
-
-    auto *CI = dyn_cast<ConstantInt>(RMWI.getValOperand());
-    if (!CI || !CI->isZero())
-      break;
-    // Check if the required ordering is compatible with an
-    // atomic load.
-    AtomicOrdering Ordering = RMWI.getOrdering();
-    assert(Ordering != AtomicOrdering::NotAtomic &&
-           Ordering != AtomicOrdering::Unordered &&
-           "AtomicRMWs don't make sense with Unordered or NotAtomic");
-    if (Ordering != AtomicOrdering::Acquire &&
-        Ordering != AtomicOrdering::Monotonic)
-      break;
-    LoadInst *Load = new LoadInst(RMWI.getType(), RMWI.getPointerOperand());
-    Load->setAtomic(Ordering, RMWI.getSyncScopeID());
-    return Load;
+  AtomicRMWInst::BinOp Op = RMWI.getOperation();
+  switch(Op) {
+    case AtomicRMWInst::Add:
+    case AtomicRMWInst::Sub:
+    case AtomicRMWInst::Or:
+    case AtomicRMWInst::Xor:
+      return C->isZero();
+    case AtomicRMWInst::And:
+      return C->isMinusOne();
+    case AtomicRMWInst::Min:
+      return C->isMaxValue(true);
+    case AtomicRMWInst::Max:
+      return C->isMinValue(true);
+    case AtomicRMWInst::UMin:
+      return C->isMaxValue(false);
+    case AtomicRMWInst::UMax:
+      return C->isMinValue(false);
+    default:
+      return false;
   }
-  return nullptr;
+}
+}
+
+
+Instruction *InstCombiner::visitAtomicRMWInst(AtomicRMWInst &RMWI) {
+  if (!isIdempotentRMW(RMWI))
+    return nullptr;
+
+  // TODO: Canonicalize the operation for an idempotent operation we can't
+  // convert into a simple load.
+
+  // Volatile RMWs perform a load and a store, we cannot replace
+  // this by just a load.
+  if (RMWI.isVolatile())
+    return nullptr;
+
+  // Check if the required ordering is compatible with an atomic load.
+  AtomicOrdering Ordering = RMWI.getOrdering();
+  assert(Ordering != AtomicOrdering::NotAtomic &&
+         Ordering != AtomicOrdering::Unordered &&
+         "AtomicRMWs don't make sense with Unordered or NotAtomic");
+  if (Ordering != AtomicOrdering::Acquire &&
+      Ordering != AtomicOrdering::Monotonic)
+    return nullptr;
+  
+  LoadInst *Load = new LoadInst(RMWI.getType(), RMWI.getPointerOperand());
+  Load->setAtomic(Ordering, RMWI.getSyncScopeID());
+  Load->setAlignment(DL.getABITypeAlignment(RMWI.getType()));
+  return Load;
 }
diff --git a/llvm/test/Transforms/InstCombine/atomicrmw.ll b/llvm/test/Transforms/InstCombine/atomicrmw.ll
index 71eba4f5459c..0ee747dce2c4 100644
--- a/llvm/test/Transforms/InstCombine/atomicrmw.ll
+++ b/llvm/test/Transforms/InstCombine/atomicrmw.ll
@@ -29,14 +29,14 @@ define i32 @atomic_sub_zero(i32* %addr) {
 }
 
 ; CHECK-LABEL: atomic_and_allones
-; CHECK-NEXT: %res = atomicrmw and i32* %addr, i32 -1 monotonic
+; CHECK-NEXT: %res = load atomic i32, i32* %addr monotonic, align 4
 ; CHECK-NEXT: ret i32 %res
 define i32 @atomic_and_allones(i32* %addr) {
   %res = atomicrmw and i32* %addr, i32 -1 monotonic
   ret i32 %res
 }
 ; CHECK-LABEL: atomic_umin_uint_max
-; CHECK-NEXT: %res = atomicrmw umin i32* %addr, i32 -1 monotonic
+; CHECK-NEXT: %res = load atomic i32, i32* %addr monotonic, align 4
 ; CHECK-NEXT: ret i32 %res
 define i32 @atomic_umin_uint_max(i32* %addr) {
   %res = atomicrmw umin i32* %addr, i32 -1 monotonic
@@ -44,7 +44,7 @@ define i32 @atomic_umin_uint_max(i32* %addr) {
 }
 
 ; CHECK-LABEL: atomic_umax_zero
-; CHECK-NEXT: %res = atomicrmw umax i32* %addr, i32 0 monotonic
+; CHECK-NEXT: %res = load atomic i32, i32* %addr monotonic, align 4
 ; CHECK-NEXT: ret i32 %res
 define i32 @atomic_umax_zero(i32* %addr) {
   %res = atomicrmw umax i32* %addr, i32 0 monotonic
@@ -52,24 +52,23 @@ define i32 @atomic_umax_zero(i32* %addr) {
 }
 
 ; CHECK-LABEL: atomic_min_smax_char
-; CHECK-NEXT: %res = atomicrmw min i8* %addr, i8 -128 monotonic
+; CHECK-NEXT: %res = load atomic i8, i8* %addr monotonic, align 1
 ; CHECK-NEXT: ret i8 %res
 define i8 @atomic_min_smax_char(i8* %addr) {
-  %res = atomicrmw min i8* %addr, i8 -128 monotonic
+  %res = atomicrmw min i8* %addr, i8 127 monotonic
   ret i8 %res
 }
 
 ; CHECK-LABEL: atomic_max_smin_char
-; CHECK-NEXT: %res = atomicrmw max i8* %addr, i8 127 monotonic
+; CHECK-NEXT: %res = load atomic i8, i8* %addr monotonic, align 1
 ; CHECK-NEXT: ret i8 %res
 define i8 @atomic_max_smin_char(i8* %addr) {
-  %res = atomicrmw max i8* %addr, i8 127 monotonic
+  %res = atomicrmw max i8* %addr, i8 -128 monotonic
   ret i8 %res
 }
 
 
-; Don't transform volatile atomicrmw. This would eliminate a volatile store
-; otherwise.
+; Can't replace a volatile w/a load; this would eliminate a volatile store.
 ; CHECK-LABEL: atomic_sub_zero_volatile
 ; CHECK-NEXT: %res = atomicrmw volatile sub i64* %addr, i64 0 acquire
 ; CHECK-NEXT: ret i64 %res
@@ -109,10 +108,8 @@ define i16 @atomic_add_non_zero(i16* %addr) {
   ret i16 %res
 }
 
-; Check that the transformation does not apply when the value is changed by
-; the atomic operation (xor operation with zero).
 ; CHECK-LABEL: atomic_xor_zero
-; CHECK-NEXT: %res = atomicrmw xor i16* %addr, i16 0 monotonic
+; CHECK-NEXT: %res = load atomic i16, i16* %addr monotonic, align 2
 ; CHECK-NEXT: ret i16 %res
 define i16 @atomic_xor_zero(i16* %addr) {
   %res = atomicrmw xor i16* %addr, i16 0 monotonic