Don't treat readnone call in presplit coroutine as not access memory

To solve the readnone problems in coroutines. See https://discourse.llvm.org/t/address-thread-identification-problems-with-coroutine/62015 for details. According to the discussion, we decide to fix the problem by inserting isPresplitCoroutine() checks in different passes instead of wrapping/unwrapping readnone attributes in CoroEarly/CoroCleanup passes. In this direction, we might not be able to cover every case at first. Let's take a "find and fix" strategy. Reviewed By: nikic, nhaehnle, jyknight Differential Revision: https://reviews.llvm.org/D127383
2022-07-20 10:37:23 +08:00 · 2022-07-20 10:37:23 +08:00 · 57224ff4a6
parent 87ce7b41d8
commit 57224ff4a6
8 changed files with 323 additions and 6 deletions
--- a/llvm/docs/Coroutines.rst
+++ b/llvm/docs/Coroutines.rst
@ -1751,4 +1751,8 @@ Areas Requiring Attention
 #. Make required changes to make sure that coroutine optimizations work with
   LTO.

+#. A readnone/writeonly call may access memory in a presplit coroutine. Since
+thread-id was assumed to be a constant in a function historically. But it is
+not true for coroutines.
+
 #. More tests, more tests, more tests
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@ -1712,11 +1712,17 @@ example:
    the module.  That is, a function can be both ``inaccessiblememonly`` and
    have a ``noalias`` return which introduces a new, potentially initialized,
    allocation.
+
+    Note that accessing the current thread's identity, e.g. getting the address
+    of a thread-local variable is not considered a memory read.
 ``inaccessiblemem_or_argmemonly``
    This attribute indicates that the function may only access memory that is
    either not accessible by the module being compiled, or is pointed to
    by its pointer arguments. This is a weaker form of  ``argmemonly``. If the
    function reads or writes other memory, the behavior is undefined.
+
+    Note that accessing the current thread's identity, e.g. getting the address
+    of a thread-local variable is not considered a memory read.
 ``inlinehint``
    This attribute indicates that the source code contained a hint that
    inlining this function is desirable (such as the "inline" keyword in
@ -1943,6 +1949,9 @@ example:
    or has other side-effects, the behavior is undefined. If a
    function reads from or writes to a readnone pointer argument, the behavior
    is undefined.
+
+    Note that accessing the current thread's identity, e.g. getting the address
+    of a thread-local variable is not considered a memory read.
 ``readonly``
    On a function, this attribute indicates that the function does not write
    through any pointer arguments (including ``byval`` arguments) or otherwise
@ -1990,6 +1999,9 @@ example:
    If a writeonly function reads memory visible outside the function or has
    other side-effects, the behavior is undefined. If a function reads
    from a writeonly pointer argument, the behavior is undefined.
+
+    Note that accessing the current thread's identity, e.g. getting the address
+    of a thread-local variable is not considered a memory read.
 ``argmemonly``
    This attribute indicates that the only memory accesses inside function are
    loads and stores from objects pointed to by its pointer-typed arguments,
--- a/llvm/include/llvm/IR/InstrTypes.h
+++ b/llvm/include/llvm/IR/InstrTypes.h
@ -1848,7 +1848,14 @@ public:
  bool isNoInline() const { return hasFnAttr(Attribute::NoInline); }
  void setIsNoInline() { addFnAttr(Attribute::NoInline); }
  /// Determine if the call does not access memory.
-  bool doesNotAccessMemory() const { return hasFnAttr(Attribute::ReadNone); }
+  bool doesNotAccessMemory() const {
+    return hasFnAttr(Attribute::ReadNone) &&
+           // If the call lives in presplit coroutine, we can't assume the
+           // call won't access memory even if it has readnone attribute.
+           // Since readnone could be used for thread identification and
+           // coroutines might resume in different threads.
+           (!getFunction() || !getFunction()->isPresplitCoroutine());
+  }
  void setDoesNotAccessMemory() { addFnAttr(Attribute::ReadNone); }

  /// Determine if the call does not access or only reads memory.
@ -1860,21 +1867,30 @@ public:

  /// Determine if the call does not access or only writes memory.
  bool onlyWritesMemory() const {
-    return hasImpliedFnAttr(Attribute::WriteOnly);
+    return hasImpliedFnAttr(Attribute::WriteOnly) &&
+           // See the comments in doesNotAccessMemory. Because readnone implies
+           // writeonly.
+           (!getFunction() || !getFunction()->isPresplitCoroutine());
  }
  void setOnlyWritesMemory() { addFnAttr(Attribute::WriteOnly); }

  /// Determine if the call can access memmory only using pointers based
  /// on its arguments.
  bool onlyAccessesArgMemory() const {
-    return hasFnAttr(Attribute::ArgMemOnly);
+    return hasFnAttr(Attribute::ArgMemOnly) &&
+        // Thread ID don't count as inaccessible memory. And thread ID don't
+        // count as constant in presplit coroutine.
+        (!getFunction() || !getFunction()->isPresplitCoroutine());;
  }
  void setOnlyAccessesArgMemory() { addFnAttr(Attribute::ArgMemOnly); }

  /// Determine if the function may only access memory that is
  /// inaccessible from the IR.
  bool onlyAccessesInaccessibleMemory() const {
-    return hasFnAttr(Attribute::InaccessibleMemOnly);
+    return hasFnAttr(Attribute::InaccessibleMemOnly) &&
+        // Thread ID don't count as inaccessible memory. And thread ID don't
+        // count as constant in presplit coroutine.
+        (!getFunction() || !getFunction()->isPresplitCoroutine());
  }
  void setOnlyAccessesInaccessibleMemory() {
    addFnAttr(Attribute::InaccessibleMemOnly);
@ -1883,7 +1899,10 @@ public:
  /// Determine if the function may only access memory that is
  /// either inaccessible from the IR or pointed to by its arguments.
  bool onlyAccessesInaccessibleMemOrArgMem() const {
-    return hasFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
+    return hasFnAttr(Attribute::InaccessibleMemOrArgMemOnly) &&
+        // Thread ID don't count as inaccessible memory. And thread ID don't
+        // count as constant in presplit coroutine.
+        (!getFunction() || !getFunction()->isPresplitCoroutine());
  }
  void setOnlyAccessesInaccessibleMemOrArgMem() {
    addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@ -767,7 +767,11 @@ FunctionModRefBehavior BasicAAResult::getModRefBehavior(const CallBase *Call) {
  // If the call has operand bundles then aliasing attributes from the function
  // it calls do not directly apply to the call.  This can be made more precise
  // in the future.
-  if (!Call->hasOperandBundles())
+  //
+  // If the call lives in a presplit coroutine, the readnone, writeonly,
+  // inaccessiblememonly and inaccessiblemem_or_argmemonly attribute from the
+  // function might not directly apply to the call.
+  if (!Call->hasOperandBundles() && !Call->getFunction()->isPresplitCoroutine())
    if (const Function *F = Call->getCalledFunction())
      Min =
          FunctionModRefBehavior(Min & getBestAAResults().getModRefBehavior(F));
--- a/llvm/test/Transforms/Coroutines/coro-readnone-01.ll
+++ b/llvm/test/Transforms/Coroutines/coro-readnone-01.ll
@ -0,0 +1,89 @@
+; Tests that the readnone function which cross suspend points wouldn't be misoptimized.
+; RUN: opt < %s -S -passes='default<O3>' | FileCheck %s --check-prefixes=CHECK,CHECK_SPLITTED
+; RUN: opt < %s -S -passes='early-cse' | FileCheck %s --check-prefixes=CHECK,CHECK_UNSPLITTED
+; RUN: opt < %s -S -passes='gvn' | FileCheck %s --check-prefixes=CHECK,CHECK_UNSPLITTED
+; RUN: opt < %s -S -passes='newgvn' | FileCheck %s --check-prefixes=CHECK,CHECK_UNSPLITTED
+
+define ptr @f() presplitcoroutine {
+entry:
+  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call ptr @malloc(i32 %size)
+  %hdl = call ptr @llvm.coro.begin(token %id, ptr %alloc)
+  %j = call i32 @readnone_func() readnone
+  %sus_result = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %sus_result, label %suspend [i8 0, label %resume
+                                         i8 1, label %cleanup]
+resume:
+  %i = call i32 @readnone_func() readnone
+  %cmp = icmp eq i32 %i, %j
+  br i1 %cmp, label %same, label %diff
+
+same:
+  call void @print_same()
+  br label %cleanup
+
+diff:
+  call void @print_diff()
+  br label %cleanup
+
+cleanup:
+  %mem = call ptr @llvm.coro.free(token %id, ptr %hdl)
+  call void @free(ptr %mem)
+  br label %suspend
+
+suspend:
+  call i1 @llvm.coro.end(ptr %hdl, i1 0)
+  ret ptr %hdl
+}
+
+; Tests that normal functions wouldn't be affected.
+define i1 @normal_function() {
+entry:
+  %i = call i32 @readnone_func() readnone
+  %j = call i32 @readnone_func() readnone
+  %cmp = icmp eq i32 %i, %j
+  br i1 %cmp, label %same, label %diff
+
+same:
+  call void @print_same()
+  ret i1 true
+
+diff:
+  call void @print_diff()
+  ret i1 false
+}
+
+; CHECK_SPLITTED-LABEL: normal_function(
+; CHECK_SPLITTED-NEXT: entry
+; CHECK_SPLITTED-NEXT:   call i32 @readnone_func()
+; CHECK_SPLITTED-NEXT:   call void @print_same()
+; CHECK_SPLITTED-NEXT:   ret i1 true
+;
+; CHECK_SPLITTED-LABEL: f.resume(
+; CHECK_UNSPLITTED-LABEL: @f(
+; CHECK: br i1 %cmp, label %same, label %diff
+; CHECK-EMPTY:
+; CHECK-NEXT: same:
+; CHECK-NEXT:   call void @print_same()
+; CHECK-NEXT:   br label
+; CHECK-EMPTY:
+; CHECK-NEXT: diff:
+; CHECK-NEXT:   call void @print_diff()
+; CHECK-NEXT:   br label
+
+declare i32 @readnone_func() readnone
+
+declare void @print_same()
+declare void @print_diff()
+declare ptr @llvm.coro.free(token, ptr)
+declare i32 @llvm.coro.size.i32()
+declare i8  @llvm.coro.suspend(token, i1)
+
+declare token @llvm.coro.id(i32, ptr, ptr, ptr)
+declare i1 @llvm.coro.alloc(token)
+declare ptr @llvm.coro.begin(token, ptr)
+declare i1 @llvm.coro.end(ptr, i1)
+
+declare noalias ptr @malloc(i32)
+declare void @free(ptr)
--- a/llvm/test/Transforms/Coroutines/coro-readnone-02.ll
+++ b/llvm/test/Transforms/Coroutines/coro-readnone-02.ll
@ -0,0 +1,81 @@
+; Tests that the readnone function which don't cross suspend points could be optimized expectly after split.
+;
+; RUN: opt < %s -S -passes='default<O3>' | FileCheck %s --check-prefixes=CHECK_SPLITTED
+; RUN: opt < %s -S -passes='coro-split,early-cse,simplifycfg' | FileCheck %s --check-prefixes=CHECK_SPLITTED
+; RUN: opt < %s -S -passes='coro-split,gvn,simplifycfg' | FileCheck %s --check-prefixes=CHECK_SPLITTED
+; RUN: opt < %s -S -passes='coro-split,newgvn,simplifycfg' | FileCheck %s --check-prefixes=CHECK_SPLITTED
+; RUN: opt < %s -S -passes='early-cse' | FileCheck %s --check-prefixes=CHECK_UNSPLITTED
+; RUN: opt < %s -S -passes='gvn' | FileCheck %s --check-prefixes=CHECK_UNSPLITTED
+; RUN: opt < %s -S -passes='newgvn' | FileCheck %s --check-prefixes=CHECK_UNSPLITTED
+
+define ptr @f() presplitcoroutine {
+entry:
+  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call ptr @malloc(i32 %size)
+  %hdl = call ptr @llvm.coro.begin(token %id, ptr %alloc)
+  %sus_result = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %sus_result, label %suspend [i8 0, label %resume
+                                         i8 1, label %cleanup]
+resume:
+  %i = call i32 @readnone_func() readnone
+  ; noop call to break optimization to combine two consecutive readonly calls.
+  call void @nop()
+  %j = call i32 @readnone_func() readnone
+  %cmp = icmp eq i32 %i, %j
+  br i1 %cmp, label %same, label %diff
+
+same:
+  call void @print_same()
+  br label %cleanup
+
+diff:
+  call void @print_diff()
+  br label %cleanup
+
+cleanup:
+  %mem = call ptr @llvm.coro.free(token %id, ptr %hdl)
+  call void @free(ptr %mem)
+  br label %suspend
+
+suspend:
+  call i1 @llvm.coro.end(ptr %hdl, i1 0)
+  ret ptr %hdl
+}
+
+;
+; CHECK_SPLITTED-LABEL: f.resume(
+; CHECK_SPLITTED-NEXT:  :
+; CHECK_SPLITTED-NEXT:    call i32 @readnone_func() #[[ATTR_NUM:[0-9]+]]
+; CHECK_SPLITTED-NEXT:    call void @nop()
+; CHECK_SPLITTED-NEXT:    call void @print_same()
+;
+; CHECK_SPLITTED: attributes #[[ATTR_NUM]] = { readnone }
+;
+; CHECK_UNSPLITTED-LABEL: @f(
+; CHECK_UNSPLITTED: br i1 %cmp, label %same, label %diff
+; CHECK_UNSPLITTED-EMPTY:
+; CHECK_UNSPLITTED-NEXT: same:
+; CHECK_UNSPLITTED-NEXT:   call void @print_same()
+; CHECK_UNSPLITTED-NEXT:   br label
+; CHECK_UNSPLITTED-EMPTY:
+; CHECK_UNSPLITTED-NEXT: diff:
+; CHECK_UNSPLITTED-NEXT:   call void @print_diff()
+; CHECK_UNSPLITTED-NEXT:   br label
+
+declare i32 @readnone_func() readnone
+declare void @nop()
+
+declare void @print_same()
+declare void @print_diff()
+declare ptr @llvm.coro.free(token, ptr)
+declare i32 @llvm.coro.size.i32()
+declare i8  @llvm.coro.suspend(token, i1)
+
+declare token @llvm.coro.id(i32, ptr, ptr, ptr)
+declare i1 @llvm.coro.alloc(token)
+declare ptr @llvm.coro.begin(token, ptr)
+declare i1 @llvm.coro.end(ptr, i1)
+
+declare noalias ptr @malloc(i32)
+declare void @free(ptr)
--- a/llvm/unittests/Analysis/AliasAnalysisTest.cpp
+++ b/llvm/unittests/Analysis/AliasAnalysisTest.cpp
@ -366,6 +366,60 @@ TEST_F(AliasAnalysisTest, PartialAliasOffsetSign) {
  EXPECT_EQ(AR, AliasResult::PartialAlias);
  EXPECT_EQ(-1, AR.getOffset());
 }
+
+TEST_F(AliasAnalysisTest, AAInCoroutines) {
+  LLVMContext C;
+  SMDiagnostic Err;
+  std::unique_ptr<Module> M = parseAssemblyString(R"(
+      define void @f() presplitcoroutine  {
+      entry:
+        %ReadNoneCall = call i32 @readnone_func() readnone
+        %WriteOnlyCall = call i32 @writeonly_func() writeonly
+        %ArgMemOnlyCall = call i32 @argmemonly_func() argmemonly
+        %OnlyAccessesInaccessibleMemoryCall = call i32 @only_accesses_inaccessible_memory_call() inaccessiblememonly
+        %OnlyAccessesInaccessibleMemOrArgMemCall = call i32 @only_accesses_inaccessible_memory_or_argmemonly_call() inaccessiblemem_or_argmemonly
+        ret void
+      }
+
+      declare i32 @readnone_func() readnone
+      declare i32 @writeonly_func() writeonly
+      declare i32 @argmemonly_func() argmemonly
+      declare i32 @only_accesses_inaccessible_memory_call() inaccessiblememonly
+      declare i32 @only_accesses_inaccessible_memory_or_argmemonly_call() inaccessiblemem_or_argmemonly
+    )",
+                                                  Err, C);
+
+  ASSERT_TRUE(M);
+  Function *F = M->getFunction("f");
+  CallInst *ReadNoneCall =
+      cast<CallInst>(getInstructionByName(*F, "ReadNoneCall"));
+
+  auto &AA = getAAResults(*F);
+  EXPECT_FALSE(AA.doesNotAccessMemory(ReadNoneCall));
+  EXPECT_TRUE(AA.onlyReadsMemory(ReadNoneCall));
+
+  EXPECT_EQ(FMRB_OnlyReadsMemory, AA.getModRefBehavior(ReadNoneCall));
+
+  CallInst *WriteOnlyCall =
+      cast<CallInst>(getInstructionByName(*F, "WriteOnlyCall"));
+  EXPECT_EQ(FMRB_UnknownModRefBehavior, AA.getModRefBehavior(WriteOnlyCall));
+
+  CallInst *ArgMemOnlyCall =
+      cast<CallInst>(getInstructionByName(*F, "ArgMemOnlyCall"));
+  EXPECT_EQ(FMRB_UnknownModRefBehavior,
+            AA.getModRefBehavior(ArgMemOnlyCall));
+
+  CallInst *OnlyAccessesInaccessibleMemoryCall =
+      cast<CallInst>(getInstructionByName(*F, "OnlyAccessesInaccessibleMemoryCall"));
+  EXPECT_EQ(FMRB_UnknownModRefBehavior,
+            AA.getModRefBehavior(OnlyAccessesInaccessibleMemoryCall));
+
+  CallInst *OnlyAccessesInaccessibleMemOrArgMemCall =
+      cast<CallInst>(getInstructionByName(*F, "OnlyAccessesInaccessibleMemOrArgMemCall"));
+  EXPECT_EQ(FMRB_UnknownModRefBehavior,
+            AA.getModRefBehavior(OnlyAccessesInaccessibleMemOrArgMemCall));
+}
+
 class AAPassInfraTest : public testing::Test {
 protected:
  LLVMContext C;
--- a/llvm/unittests/IR/InstructionsTest.cpp
+++ b/llvm/unittests/IR/InstructionsTest.cpp
@ -20,6 +20,7 @@
 #include "llvm/IR/FPEnv.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Module.h"
@ -1681,5 +1682,58 @@ TEST(InstructionsTest, AllocaInst) {
  EXPECT_EQ(H.getAllocationSizeInBits(DL), TypeSize::getFixed(160));
 }

+static Instruction *getInstructionByName(Function &F, StringRef Name) {
+  for (auto &I : instructions(F))
+    if (I.getName() == Name)
+      return &I;
+  llvm_unreachable("Expected to find instruction!");
+}
+
+TEST(InstructionsTest, CallInstInPresplitCoroutine) {
+  LLVMContext Ctx;
+  std::unique_ptr<Module> M = parseIR(Ctx, R"(
+      define void @f() presplitcoroutine  {
+      entry:
+        %ReadNoneCall = call i32 @readnone_func() readnone
+        %WriteOnlyCall = call i32 @writeonly_func() writeonly
+        %ArgMemOnlyCall = call i32 @argmemonly_func() argmemonly
+        %OnlyAccessesInaccessibleMemoryCall = call i32 @only_accesses_inaccessible_memory_call() inaccessiblememonly
+        %OnlyAccessesInaccessibleMemOrArgMemCall = call i32 @only_accesses_inaccessible_memory_or_argmemonly_call() inaccessiblemem_or_argmemonly
+        ret void
+      }
+
+      declare i32 @readnone_func() readnone
+      declare i32 @writeonly_func() writeonly
+      declare i32 @argmemonly_func() argmemonly
+      declare i32 @only_accesses_inaccessible_memory_call() inaccessiblememonly
+      declare i32 @only_accesses_inaccessible_memory_or_argmemonly_call() inaccessiblemem_or_argmemonly
+    )");
+
+  ASSERT_TRUE(M);
+  Function *F = M->getFunction("f");
+  CallInst *ReadNoneCall =
+      cast<CallInst>(getInstructionByName(*F, "ReadNoneCall"));
+  CallInst *WriteOnlyCall =
+      cast<CallInst>(getInstructionByName(*F, "WriteOnlyCall"));
+  CallInst *OnlyAccessesInaccessibleMemoryCall =
+      cast<CallInst>(getInstructionByName(*F, "OnlyAccessesInaccessibleMemoryCall"));
+  CallInst *OnlyAccessesInaccessibleMemOrArgMemCall =
+      cast<CallInst>(getInstructionByName(*F, "OnlyAccessesInaccessibleMemOrArgMemCall"));
+   CallInst *ArgMemOnlyCall =
+      cast<CallInst>(getInstructionByName(*F, "ArgMemOnlyCall"));
+
+  EXPECT_FALSE(ReadNoneCall->doesNotAccessMemory());
+  EXPECT_FALSE(ReadNoneCall->onlyWritesMemory());
+  EXPECT_TRUE(ReadNoneCall->onlyReadsMemory());
+
+  EXPECT_FALSE(WriteOnlyCall->onlyWritesMemory());
+
+  EXPECT_FALSE(OnlyAccessesInaccessibleMemoryCall->onlyAccessesInaccessibleMemory());
+
+  EXPECT_FALSE(OnlyAccessesInaccessibleMemOrArgMemCall->onlyAccessesInaccessibleMemOrArgMem());
+
+  EXPECT_FALSE(ArgMemOnlyCall->onlyAccessesArgMemory());
+}
+
 } // end anonymous namespace
 } // end namespace llvm