From ea0e2ca1acb20781515c23850ec1ee7476909b2f Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Mon, 29 Mar 2021 17:02:41 -0700 Subject: [PATCH] [SROA] Allow SROA on pointers with invariant group intrinsic uses When we are able to SROA an alloca, we know all uses of it, meaning we don't have to preserve the invariant group intrinsics and metadata. It's possible that we could lose information regarding redundant loads/stores, but that's unlikely to have any real impact since right now the only user is Clang and vtables. Reviewed By: rnk Differential Revision: https://reviews.llvm.org/D99760 --- llvm/include/llvm/IR/Instruction.h | 4 + llvm/lib/IR/Instruction.cpp | 14 ++- llvm/lib/Transforms/Scalar/SROA.cpp | 8 +- llvm/test/Transforms/SROA/invariant-group.ll | 90 ++++++++++++++++++++ 4 files changed, 113 insertions(+), 3 deletions(-) create mode 100644 llvm/test/Transforms/SROA/invariant-group.ll diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h index bf73d4155b8a..3e6402994bab 100644 --- a/llvm/include/llvm/IR/Instruction.h +++ b/llvm/include/llvm/IR/Instruction.h @@ -657,6 +657,10 @@ public: /// llvm.lifetime.end marker. bool isLifetimeStartOrEnd() const; + /// Return true if the instruction is a llvm.launder.invariant.group or + /// llvm.strip.invariant.group. + bool isLaunderOrStripInvariantGroup() const; + /// Return true if the instruction is a DbgInfoIntrinsic or PseudoProbeInst. bool isDebugOrPseudoInst() const; diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp index 87a69a8ed232..8e0678f1cf58 100644 --- a/llvm/lib/IR/Instruction.cpp +++ b/llvm/lib/IR/Instruction.cpp @@ -11,10 +11,11 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/Instruction.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/ADT/DenseSet.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" @@ -674,13 +675,22 @@ bool Instruction::willReturn() const { } bool Instruction::isLifetimeStartOrEnd() const { - auto II = dyn_cast(this); + auto *II = dyn_cast(this); if (!II) return false; Intrinsic::ID ID = II->getIntrinsicID(); return ID == Intrinsic::lifetime_start || ID == Intrinsic::lifetime_end; } +bool Instruction::isLaunderOrStripInvariantGroup() const { + auto *II = dyn_cast(this); + if (!II) + return false; + Intrinsic::ID ID = II->getIntrinsicID(); + return ID == Intrinsic::launder_invariant_group || + ID == Intrinsic::strip_invariant_group; +} + bool Instruction::isDebugOrPseudoInst() const { return isa(this) || isa(this); } diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index b36458012aa9..66a29782039b 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -926,7 +926,8 @@ private: "Map index doesn't point back to a slice with this user."); } - // Disable SRoA for any intrinsics except for lifetime invariants. + // Disable SRoA for any intrinsics except for lifetime invariants and + // invariant group. // FIXME: What about debug intrinsics? This matches old behavior, but // doesn't make sense. void visitIntrinsicInst(IntrinsicInst &II) { @@ -946,6 +947,11 @@ private: return; } + if (II.isLaunderOrStripInvariantGroup()) { + enqueueUsers(II); + return; + } + Base::visitIntrinsicInst(II); } diff --git a/llvm/test/Transforms/SROA/invariant-group.ll b/llvm/test/Transforms/SROA/invariant-group.ll new file mode 100644 index 000000000000..a01e91d79668 --- /dev/null +++ b/llvm/test/Transforms/SROA/invariant-group.ll @@ -0,0 +1,90 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=sroa -S -o - < %s | FileCheck %s + +%t = type { i32, i32 } + +declare i8* @llvm.launder.invariant.group.p0i8(i8* %a) +declare i8* @llvm.strip.invariant.group.p0i8(i8* %a) +declare void @h(i32 %a) +declare i32 @somevalue() + +define void @f() { +; CHECK-LABEL: @f( +; CHECK-NEXT: [[SV1:%.*]] = call i32 @somevalue() +; CHECK-NEXT: [[SV2:%.*]] = call i32 @somevalue() +; CHECK-NEXT: call void @h(i32 [[SV1]]) +; CHECK-NEXT: call void @h(i32 [[SV2]]) +; CHECK-NEXT: ret void +; + %a = alloca %t + + %a1 = getelementptr inbounds %t, %t* %a, i32 0, i32 0 + %a1_i8 = bitcast i32* %a1 to i8* + %a1_i8_inv = call i8* @llvm.launder.invariant.group.p0i8(i8* %a1_i8) + %a1_inv = bitcast i8* %a1_i8_inv to i32* + %a2 = getelementptr inbounds %t, %t* %a, i32 0, i32 1 + + %sv1 = call i32 @somevalue() + %sv2 = call i32 @somevalue() + + store i32 %sv1, i32* %a1_inv, !invariant.group !0 + store i32 %sv2, i32* %a2 + + %v1 = load i32, i32* %a1_inv, !invariant.group !0 + %v2 = load i32, i32* %a2 + + call void @h(i32 %v1) + call void @h(i32 %v2) + + ret void +} + +define void @g() { +; CHECK-LABEL: @g( +; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8 +; CHECK-NEXT: [[A1:%.*]] = getelementptr inbounds [[T]], %t* [[A]], i32 0, i32 0 +; CHECK-NEXT: [[A1_I8:%.*]] = bitcast i32* [[A1]] to i8* +; CHECK-NEXT: [[A1_I8_INV:%.*]] = call i8* @llvm.launder.invariant.group.p0i8(i8* [[A1_I8]]) +; CHECK-NEXT: [[A1_INV:%.*]] = bitcast i8* [[A1_I8_INV]] to i32* +; CHECK-NEXT: [[A2:%.*]] = getelementptr inbounds [[T]], %t* [[A]], i32 0, i32 1 +; CHECK-NEXT: [[SV1:%.*]] = call i32 @somevalue() +; CHECK-NEXT: [[SV2:%.*]] = call i32 @somevalue() +; CHECK-NEXT: store i32 [[SV1]], i32* [[A1_INV]], align 4, !invariant.group !0 +; CHECK-NEXT: store i32 [[SV2]], i32* [[A2]], align 4 +; CHECK-NEXT: [[V1:%.*]] = load i32, i32* [[A1_INV]], align 4, !invariant.group !0 +; CHECK-NEXT: [[V2:%.*]] = load i32, i32* [[A2]], align 4 +; CHECK-NEXT: call void @h(i32 [[V1]]) +; CHECK-NEXT: call void @h(i32 [[V2]]) +; CHECK-NEXT: [[A1_STRIPPED:%.*]] = call i8* @llvm.strip.invariant.group.p0i8(i8* [[A1_I8]]) +; CHECK-NEXT: [[A1_INT:%.*]] = ptrtoint i8* [[A1_STRIPPED]] to i32 +; CHECK-NEXT: call void @h(i32 [[A1_INT]]) +; CHECK-NEXT: ret void +; + %a = alloca %t + + %a1 = getelementptr inbounds %t, %t* %a, i32 0, i32 0 + %a1_i8 = bitcast i32* %a1 to i8* + %a1_i8_inv = call i8* @llvm.launder.invariant.group.p0i8(i8* %a1_i8) + %a1_inv = bitcast i8* %a1_i8_inv to i32* + %a2 = getelementptr inbounds %t, %t* %a, i32 0, i32 1 + + %sv1 = call i32 @somevalue() + %sv2 = call i32 @somevalue() + + store i32 %sv1, i32* %a1_inv, !invariant.group !0 + store i32 %sv2, i32* %a2 + + %v1 = load i32, i32* %a1_inv, !invariant.group !0 + %v2 = load i32, i32* %a2 + + call void @h(i32 %v1) + call void @h(i32 %v2) + + %a1_stripped = call i8* @llvm.strip.invariant.group.p0i8(i8* %a1_i8) + %a1_int = ptrtoint i8* %a1_stripped to i32 + call void @h(i32 %a1_int) + + ret void +} + +!0 = !{}