Encode alignment attribute for `atomicrmw`

This is a follow up patch to D83136 adding the align attribute to `atomicwmw`. Differential Revision: https://reviews.llvm.org/D83465
2021-02-11 15:17:37 -05:00 · 2021-02-11 15:17:37 -05:00 · d06ab79816
parent cb41ee92da
commit d06ab79816
7 changed files with 108 additions and 45 deletions
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@ -9701,7 +9701,7 @@ Syntax:

 ::

-      atomicrmw [volatile] <operation> <ty>* <pointer>, <ty> <value> [syncscope("<target-scope>")] <ordering>                   ; yields ty
+      atomicrmw [volatile] <operation> <ty>* <pointer>, <ty> <value> [syncscope("<target-scope>")] <ordering>[, align <alignment>]  ; yields ty

 Overview:
 """""""""
@ -9739,6 +9739,13 @@ the ``atomicrmw`` is marked as ``volatile``, then the optimizer is not
 allowed to modify the number or order of execution of this
 ``atomicrmw`` with other :ref:`volatile operations <volatile>`.

+The instruction can take an optional ``align`` attribute.
+The alignment must be a power of two greater or equal to the size of the
+`<value>` type. If unspecified, the alignment is assumed to be equal to the
+ size of the '<value>' type. Note that this default alignment assumption is
+ different from the alignment used for the load/store instructions when align
+ isn't specified.
+
 A ``atomicrmw`` instruction can also take an optional
 ":ref:`syncscope <syncscope>`" argument.

@ -9759,10 +9766,8 @@ operation argument:
 -  xor: ``*ptr = *ptr ^ val``
 -  max: ``*ptr = *ptr > val ? *ptr : val`` (using a signed comparison)
 -  min: ``*ptr = *ptr < val ? *ptr : val`` (using a signed comparison)
-  umax: ``*ptr = *ptr > val ? *ptr : val`` (using an unsigned
-   comparison)
-  umin: ``*ptr = *ptr < val ? *ptr : val`` (using an unsigned
-   comparison)
+-  umax: ``*ptr = *ptr > val ? *ptr : val`` (using an unsigned comparison)
+-  umin: ``*ptr = *ptr < val ? *ptr : val`` (using an unsigned comparison)
 - fadd: ``*ptr = *ptr + val`` (using floating point arithmetic)
 - fsub: ``*ptr = *ptr - val`` (using floating point arithmetic)

--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@ -7499,6 +7499,7 @@ int LLParser::parseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) {
  bool isVolatile = false;
  bool IsFP = false;
  AtomicRMWInst::BinOp Operation;
+  MaybeAlign Alignment;

  if (EatIfPresent(lltok::kw_volatile))
    isVolatile = true;
@ -7531,7 +7532,8 @@ int LLParser::parseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) {
  if (parseTypeAndValue(Ptr, PtrLoc, PFS) ||
      parseToken(lltok::comma, "expected ',' after atomicrmw address") ||
      parseTypeAndValue(Val, ValLoc, PFS) ||
-      parseScopeAndOrdering(true /*Always atomic*/, SSID, Ordering))
+      parseScopeAndOrdering(true /*Always atomic*/, SSID, Ordering) ||
+      parseOptionalCommaAlign(Alignment, AteExtraComma))
    return true;

  if (Ordering == AtomicOrdering::Unordered)
@ -7566,11 +7568,12 @@ int LLParser::parseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) {
  if (Size < 8 || (Size & (Size - 1)))
    return error(ValLoc, "atomicrmw operand must be power-of-two byte-sized"
                         " integer");
-  Align Alignment(
+  const Align DefaultAlignment(
      PFS.getFunction().getParent()->getDataLayout().getTypeStoreSize(
          Val->getType()));
  AtomicRMWInst *RMWI =
-      new AtomicRMWInst(Operation, Ptr, Val, Alignment, Ordering, SSID);
+      new AtomicRMWInst(Operation, Ptr, Val,
+                        Alignment.getValueOr(DefaultAlignment), Ordering, SSID);
  RMWI->setVolatile(isVolatile);
  Inst = RMWI;
  return AteExtraComma ? InstExtraComma : InstNormal;
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@ -5131,29 +5131,55 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
      break;
    }
    case bitc::FUNC_CODE_INST_ATOMICRMW: {
-      // ATOMICRMW:[ptrty, ptr, val, op, vol, ordering, ssid]
+      // ATOMICRMW:[ptrty, ptr, val, op, vol, ordering, ssid, align?]
+      const size_t NumRecords = Record.size();
      unsigned OpNum = 0;
-      Value *Ptr, *Val;
-      if (getValueTypePair(Record, OpNum, NextValueNo, Ptr, &FullTy) ||
-          !isa<PointerType>(Ptr->getType()) ||
-          popValue(Record, OpNum, NextValueNo,
-                   getPointerElementFlatType(FullTy), Val) ||
-          OpNum + 4 != Record.size())
+
+      Value *Ptr = nullptr;
+      if (getValueTypePair(Record, OpNum, NextValueNo, Ptr, &FullTy))
        return error("Invalid record");
-      AtomicRMWInst::BinOp Operation = getDecodedRMWOperation(Record[OpNum]);
+
+      if (!isa<PointerType>(Ptr->getType()))
+        return error("Invalid record");
+
+      Value *Val = nullptr;
+      if (popValue(Record, OpNum, NextValueNo,
+                   getPointerElementFlatType(FullTy), Val))
+        return error("Invalid record");
+
+      if (!(NumRecords == (OpNum + 4) || NumRecords == (OpNum + 5)))
+        return error("Invalid record");
+
+      const AtomicRMWInst::BinOp Operation =
+          getDecodedRMWOperation(Record[OpNum]);
      if (Operation < AtomicRMWInst::FIRST_BINOP ||
          Operation > AtomicRMWInst::LAST_BINOP)
        return error("Invalid record");
-      AtomicOrdering Ordering = getDecodedOrdering(Record[OpNum + 2]);
+
+      const bool IsVol = Record[OpNum + 1];
+
+      const AtomicOrdering Ordering = getDecodedOrdering(Record[OpNum + 2]);
      if (Ordering == AtomicOrdering::NotAtomic ||
          Ordering == AtomicOrdering::Unordered)
        return error("Invalid record");
-      SyncScope::ID SSID = getDecodedSyncScopeID(Record[OpNum + 3]);
-      Align Alignment(
-          TheModule->getDataLayout().getTypeStoreSize(Val->getType()));
-      I = new AtomicRMWInst(Operation, Ptr, Val, Alignment, Ordering, SSID);
+
+      const SyncScope::ID SSID = getDecodedSyncScopeID(Record[OpNum + 3]);
+
+      MaybeAlign Alignment;
+
+      if (NumRecords == (OpNum + 5)) {
+        if (Error Err = parseAlignmentValue(Record[6], Alignment))
+          return Err;
+      }
+
+      if (!Alignment)
+        Alignment =
+            Align(TheModule->getDataLayout().getTypeStoreSize(Val->getType()));
+
+      I = new AtomicRMWInst(Operation, Ptr, Val, *Alignment, Ordering, SSID);
      FullTy = getPointerElementFlatType(FullTy);
-      cast<AtomicRMWInst>(I)->setVolatile(Record[OpNum+1]);
+      cast<AtomicRMWInst>(I)->setVolatile(IsVol);
+
      InstructionList.push_back(I);
      break;
    }
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@ -3071,6 +3071,7 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I,
    Vals.push_back(getEncodedOrdering(cast<AtomicRMWInst>(I).getOrdering()));
    Vals.push_back(
        getEncodedSyncScopeID(cast<AtomicRMWInst>(I).getSyncScopeID()));
+    Vals.push_back(getEncodedAlign(cast<AtomicRMWInst>(I).getAlign()));
    break;
  case Instruction::Fence:
    Code = bitc::FUNC_CODE_INST_FENCE;
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@ -4327,6 +4327,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
  } else if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(&I)) {
    writeAtomic(RMWI->getContext(), RMWI->getOrdering(),
                RMWI->getSyncScopeID());
+    Out << ", align " << RMWI->getAlign().value();
  } else if (const FenceInst *FI = dyn_cast<FenceInst>(&I)) {
    writeAtomic(FI->getContext(), FI->getOrdering(), FI->getSyncScopeID());
  } else if (const ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(&I)) {
--- a/llvm/test/Bitcode/compatibility.ll
+++ b/llvm/test/Bitcode/compatibility.ll
@ -734,28 +734,55 @@ define void @atomics(i32* %word) {
  ; CHECK: %cmpxchg.6 = cmpxchg volatile i32* %word, i32 0, i32 10 seq_cst monotonic
  %cmpxchg.7 = cmpxchg weak volatile i32* %word, i32 0, i32 11 syncscope("singlethread") seq_cst monotonic
  ; CHECK: %cmpxchg.7 = cmpxchg weak volatile i32* %word, i32 0, i32 11 syncscope("singlethread") seq_cst monotonic
-  %atomicrmw.xchg = atomicrmw xchg i32* %word, i32 12 monotonic
-  ; CHECK: %atomicrmw.xchg = atomicrmw xchg i32* %word, i32 12 monotonic
-  %atomicrmw.add = atomicrmw add i32* %word, i32 13 monotonic
-  ; CHECK: %atomicrmw.add = atomicrmw add i32* %word, i32 13 monotonic
-  %atomicrmw.sub = atomicrmw sub i32* %word, i32 14 monotonic
-  ; CHECK: %atomicrmw.sub = atomicrmw sub i32* %word, i32 14 monotonic
-  %atomicrmw.and = atomicrmw and i32* %word, i32 15 monotonic
-  ; CHECK: %atomicrmw.and = atomicrmw and i32* %word, i32 15 monotonic
-  %atomicrmw.nand = atomicrmw nand i32* %word, i32 16 monotonic
-  ; CHECK: %atomicrmw.nand = atomicrmw nand i32* %word, i32 16 monotonic
-  %atomicrmw.or = atomicrmw or i32* %word, i32 17 monotonic
-  ; CHECK: %atomicrmw.or = atomicrmw or i32* %word, i32 17 monotonic
-  %atomicrmw.xor = atomicrmw xor i32* %word, i32 18 monotonic
-  ; CHECK: %atomicrmw.xor = atomicrmw xor i32* %word, i32 18 monotonic
-  %atomicrmw.max = atomicrmw max i32* %word, i32 19 monotonic
-  ; CHECK: %atomicrmw.max = atomicrmw max i32* %word, i32 19 monotonic
-  %atomicrmw.min = atomicrmw volatile min i32* %word, i32 20 monotonic
-  ; CHECK: %atomicrmw.min = atomicrmw volatile min i32* %word, i32 20 monotonic
-  %atomicrmw.umax = atomicrmw umax i32* %word, i32 21 syncscope("singlethread") monotonic
-  ; CHECK: %atomicrmw.umax = atomicrmw umax i32* %word, i32 21 syncscope("singlethread") monotonic
-  %atomicrmw.umin = atomicrmw volatile umin i32* %word, i32 22 syncscope("singlethread") monotonic
-  ; CHECK: %atomicrmw.umin = atomicrmw volatile umin i32* %word, i32 22 syncscope("singlethread") monotonic
+
+  ;; Atomic w/o alignment
+  %atomicrmw_no_align.xchg = atomicrmw xchg i32* %word, i32 12 monotonic
+  ; CHECK: %atomicrmw_no_align.xchg = atomicrmw xchg i32* %word, i32 12 monotonic
+  %atomicrmw_no_align.add = atomicrmw add i32* %word, i32 13 monotonic
+  ; CHECK: %atomicrmw_no_align.add = atomicrmw add i32* %word, i32 13 monotonic
+  %atomicrmw_no_align.sub = atomicrmw sub i32* %word, i32 14 monotonic
+  ; CHECK: %atomicrmw_no_align.sub = atomicrmw sub i32* %word, i32 14 monotonic
+  %atomicrmw_no_align.and = atomicrmw and i32* %word, i32 15 monotonic
+  ; CHECK: %atomicrmw_no_align.and = atomicrmw and i32* %word, i32 15 monotonic
+  %atomicrmw_no_align.nand = atomicrmw nand i32* %word, i32 16 monotonic
+  ; CHECK: %atomicrmw_no_align.nand = atomicrmw nand i32* %word, i32 16 monotonic
+  %atomicrmw_no_align.or = atomicrmw or i32* %word, i32 17 monotonic
+  ; CHECK: %atomicrmw_no_align.or = atomicrmw or i32* %word, i32 17 monotonic
+  %atomicrmw_no_align.xor = atomicrmw xor i32* %word, i32 18 monotonic
+  ; CHECK: %atomicrmw_no_align.xor = atomicrmw xor i32* %word, i32 18 monotonic
+  %atomicrmw_no_align.max = atomicrmw max i32* %word, i32 19 monotonic
+  ; CHECK: %atomicrmw_no_align.max = atomicrmw max i32* %word, i32 19 monotonic
+  %atomicrmw_no_align.min = atomicrmw volatile min i32* %word, i32 20 monotonic
+  ; CHECK: %atomicrmw_no_align.min = atomicrmw volatile min i32* %word, i32 20 monotonic
+  %atomicrmw_no_align.umax = atomicrmw umax i32* %word, i32 21 syncscope("singlethread") monotonic
+  ; CHECK: %atomicrmw_no_align.umax = atomicrmw umax i32* %word, i32 21 syncscope("singlethread") monotonic
+  %atomicrmw_no_align.umin = atomicrmw volatile umin i32* %word, i32 22 syncscope("singlethread") monotonic
+  ; CHECK: %atomicrmw_no_align.umin = atomicrmw volatile umin i32* %word, i32 22 syncscope("singlethread") monotonic
+
+  ;; Atomic w/ alignment
+  %atomicrmw.xchg = atomicrmw xchg i32* %word, i32 12 monotonic, align 16
+  ; CHECK: %atomicrmw.xchg = atomicrmw xchg i32* %word, i32 12 monotonic, align 16
+  %atomicrmw.add = atomicrmw add i32* %word, i32 13 monotonic, align 16
+  ; CHECK: %atomicrmw.add = atomicrmw add i32* %word, i32 13 monotonic, align 16
+  %atomicrmw.sub = atomicrmw sub i32* %word, i32 14 monotonic, align 16
+  ; CHECK: %atomicrmw.sub = atomicrmw sub i32* %word, i32 14 monotonic, align 16
+  %atomicrmw.and = atomicrmw and i32* %word, i32 15 monotonic, align 16
+  ; CHECK: %atomicrmw.and = atomicrmw and i32* %word, i32 15 monotonic, align 16
+  %atomicrmw.nand = atomicrmw nand i32* %word, i32 16 monotonic, align 16
+  ; CHECK: %atomicrmw.nand = atomicrmw nand i32* %word, i32 16 monotonic, align 16
+  %atomicrmw.or = atomicrmw or i32* %word, i32 17 monotonic, align 16
+  ; CHECK: %atomicrmw.or = atomicrmw or i32* %word, i32 17 monotonic, align 16
+  %atomicrmw.xor = atomicrmw xor i32* %word, i32 18 monotonic, align 16
+  ; CHECK: %atomicrmw.xor = atomicrmw xor i32* %word, i32 18 monotonic, align 16
+  %atomicrmw.max = atomicrmw max i32* %word, i32 19 monotonic, align 16
+  ; CHECK: %atomicrmw.max = atomicrmw max i32* %word, i32 19 monotonic, align 16
+  %atomicrmw.min = atomicrmw volatile min i32* %word, i32 20 monotonic, align 16
+  ; CHECK: %atomicrmw.min = atomicrmw volatile min i32* %word, i32 20 monotonic, align 16
+  %atomicrmw.umax = atomicrmw umax i32* %word, i32 21 syncscope("singlethread") monotonic, align 16
+  ; CHECK: %atomicrmw.umax = atomicrmw umax i32* %word, i32 21 syncscope("singlethread") monotonic, align 16
+  %atomicrmw.umin = atomicrmw volatile umin i32* %word, i32 22 syncscope("singlethread") monotonic, align 16
+  ; CHECK: %atomicrmw.umin = atomicrmw volatile umin i32* %word, i32 22 syncscope("singlethread") monotonic, align 16
+  
  fence acquire
  ; CHECK: fence acquire
  fence release
--- a/llvm/test/Transforms/GCOVProfiling/atomic-counter.ll
+++ b/llvm/test/Transforms/GCOVProfiling/atomic-counter.ll
@ -4,7 +4,7 @@

 ; CHECK-LABEL: void @empty()
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    %0 = atomicrmw add i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__llvm_gcov_ctr, i64 0, i64 0), i64 1 monotonic, !dbg [[DBG:![0-9]+]]
+; CHECK-NEXT:    %0 = atomicrmw add i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__llvm_gcov_ctr, i64 0, i64 0), i64 1 monotonic, align 8, !dbg [[DBG:![0-9]+]]
 ; CHECK-NEXT:    ret void, !dbg [[DBG]]

 define dso_local void @empty() !dbg !5 {