From 7dd8dbf48652dfddd08cb7a6de0f2b079be45ea8 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Fri, 22 Apr 2016 21:18:02 +0000 Subject: [PATCH] Introduce llvm.load.relative intrinsic. This intrinsic takes two arguments, ``%ptr`` and ``%offset``. It loads a 32-bit value from the address ``%ptr + %offset``, adds ``%ptr`` to that value and returns it. The constant folder specifically recognizes the form of this intrinsic and the constant initializers it may load from; if a loaded constant initializer is known to have the form ``i32 trunc(x - %ptr)``, the intrinsic call is folded to ``x``. LLVM provides that the calculation of such a constant initializer will not overflow at link time under the medium code model if ``x`` is an ``unnamed_addr`` function. However, it does not provide this guarantee for a constant initializer folded into a function body. This intrinsic can be used to avoid the possibility of overflows when loading from such a constant. Differential Revision: http://reviews.llvm.org/D18367 llvm-svn: 267223 --- llvm/docs/LangRef.rst | 25 ++++++ llvm/include/llvm/CodeGen/Passes.h | 5 ++ llvm/include/llvm/IR/Intrinsics.td | 3 + llvm/include/llvm/InitializePasses.h | 1 + llvm/lib/Analysis/InlineCost.cpp | 5 ++ llvm/lib/Analysis/InstructionSimplify.cpp | 61 +++++++++++++ llvm/lib/CodeGen/CMakeLists.txt | 1 + llvm/lib/CodeGen/CodeGen.cpp | 1 + llvm/lib/CodeGen/LLVMTargetMachine.cpp | 2 + llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp | 85 +++++++++++++++++++ llvm/test/CodeGen/Generic/stop-after.ll | 2 +- .../InstSimplify/load-relative-32.ll | 19 +++++ .../Transforms/InstSimplify/load-relative.ll | 75 ++++++++++++++++ .../PreISelIntrinsicLowering/load-relative.ll | 26 ++++++ llvm/tools/opt/opt.cpp | 1 + 15 files changed, 311 insertions(+), 1 deletion(-) create mode 100644 llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp create mode 100644 llvm/test/Transforms/InstSimplify/load-relative-32.ll create mode 100644 llvm/test/Transforms/InstSimplify/load-relative.ll create mode 100644 llvm/test/Transforms/PreISelIntrinsicLowering/load-relative.ll diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 2cb71bb78ce0..d588b0514b08 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -12361,6 +12361,31 @@ if"); and this allows for "check widening" type optimizations. ``@llvm.experimental.guard`` cannot be invoked. +'``llvm.load.relative``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare i8* @llvm.load.relative.iN(i8* %ptr, iN %offset) argmemonly nounwind readonly + +Overview: +""""""""" + +This intrinsic loads a 32-bit value from the address ``%ptr + %offset``, +adds ``%ptr`` to that value and returns it. The constant folder specifically +recognizes the form of this intrinsic and the constant initializers it may +load from; if a loaded constant initializer is known to have the form +``i32 trunc(x - %ptr)``, the intrinsic call is folded to ``x``. + +LLVM provides that the calculation of such a constant initializer will +not overflow at link time under the medium code model if ``x`` is an +``unnamed_addr`` function. However, it does not provide this guarantee for +a constant initializer folded into a function body. This intrinsic can be +used to avoid the possibility of overflows when loading from such a constant. + Stack Map Intrinsics -------------------- diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index 30630dcf6449..589c0500a41c 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -680,6 +680,11 @@ namespace llvm { /// ModulePass *createLowerEmuTLSPass(const TargetMachine *TM); + /// This pass lowers the @llvm.load.relative intrinsic to instructions. + /// This is unsafe to do earlier because a pass may combine the constant + /// initializer into the load, which may result in an overflowing evaluation. + ModulePass *createPreISelIntrinsicLoweringPass(); + /// GlobalMerge - This pass merges internal (by default) globals into structs /// to enable reuse of a base pointer by indexed addressing modes. /// It can also be configured to focus on size optimizations only. diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index e49872043fd1..f245df4ff911 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -669,6 +669,9 @@ def int_masked_scatter: Intrinsic<[], def int_bitset_test : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty, llvm_metadata_ty], [IntrNoMem]>; +def int_load_relative: Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_anyint_ty], + [IntrReadMem, IntrArgMemOnly]>; + //===----------------------------------------------------------------------===// // Target-specific intrinsics //===----------------------------------------------------------------------===// diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 638071fb11db..557f927a6568 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -246,6 +246,7 @@ void initializePostOrderFunctionAttrsLegacyPassPass(PassRegistry&); void initializePostRAHazardRecognizerPass(PassRegistry&); void initializePostRASchedulerPass(PassRegistry&); void initializePostMachineSchedulerPass(PassRegistry&); +void initializePreISelIntrinsicLoweringPass(PassRegistry&); void initializePrintFunctionPassWrapperPass(PassRegistry&); void initializePrintModulePassWrapperPass(PassRegistry&); void initializePrintBasicBlockPassPass(PassRegistry&); diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp index aa46b68d8c33..ee36c6e5486d 100644 --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -900,6 +900,11 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { default: return Base::visitCallSite(CS); + case Intrinsic::load_relative: + // This is normally lowered to 4 LLVM instructions. + Cost += 3 * InlineConstants::InstrCost; + return false; + case Intrinsic::memset: case Intrinsic::memcpy: case Intrinsic::memmove: diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index b34aede2d3c3..b9e6bdbf2e54 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -3825,6 +3825,62 @@ static bool IsIdempotent(Intrinsic::ID ID) { } } +static Value *SimplifyRelativeLoad(Constant *Ptr, Constant *Offset, + const DataLayout &DL) { + GlobalValue *PtrSym; + APInt PtrOffset; + if (!IsConstantOffsetFromGlobal(Ptr, PtrSym, PtrOffset, DL)) + return nullptr; + + Type *Int8PtrTy = Type::getInt8PtrTy(Ptr->getContext()); + Type *Int32Ty = Type::getInt32Ty(Ptr->getContext()); + Type *Int32PtrTy = Int32Ty->getPointerTo(); + Type *Int64Ty = Type::getInt64Ty(Ptr->getContext()); + + auto *OffsetConstInt = dyn_cast(Offset); + if (!OffsetConstInt || OffsetConstInt->getType()->getBitWidth() > 64) + return nullptr; + + uint64_t OffsetInt = OffsetConstInt->getSExtValue(); + if (OffsetInt % 4 != 0) + return nullptr; + + Constant *C = ConstantExpr::getGetElementPtr( + Int32Ty, ConstantExpr::getBitCast(Ptr, Int32PtrTy), + ConstantInt::get(Int64Ty, OffsetInt / 4)); + Constant *Loaded = ConstantFoldLoadFromConstPtr(C, Int32Ty, DL); + if (!Loaded) + return nullptr; + + auto *LoadedCE = dyn_cast(Loaded); + if (!LoadedCE) + return nullptr; + + if (LoadedCE->getOpcode() == Instruction::Trunc) { + LoadedCE = dyn_cast(LoadedCE->getOperand(0)); + if (!LoadedCE) + return nullptr; + } + + if (LoadedCE->getOpcode() != Instruction::Sub) + return nullptr; + + auto *LoadedLHS = dyn_cast(LoadedCE->getOperand(0)); + if (!LoadedLHS || LoadedLHS->getOpcode() != Instruction::PtrToInt) + return nullptr; + auto *LoadedLHSPtr = LoadedLHS->getOperand(0); + + Constant *LoadedRHS = LoadedCE->getOperand(1); + GlobalValue *LoadedRHSSym; + APInt LoadedRHSOffset; + if (!IsConstantOffsetFromGlobal(LoadedRHS, LoadedRHSSym, LoadedRHSOffset, + DL) || + PtrSym != LoadedRHSSym || PtrOffset != LoadedRHSOffset) + return nullptr; + + return ConstantExpr::getBitCast(LoadedLHSPtr, Int8PtrTy); +} + template static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd, const Query &Q, unsigned MaxRecurse) { @@ -3865,6 +3921,11 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd, if (match(RHS, m_Undef())) return Constant::getNullValue(ReturnType); } + + if (IID == Intrinsic::load_relative && isa(LHS) && + isa(RHS)) + return SimplifyRelativeLoad(cast(LHS), cast(RHS), + Q.DL); } // Perform idempotent optimizations diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index aab4088eb9ac..0717c3beefee 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -87,6 +87,7 @@ add_llvm_library(LLVMCodeGen PHIEliminationUtils.cpp PostRAHazardRecognizer.cpp PostRASchedulerList.cpp + PreISelIntrinsicLowering.cpp ProcessImplicitDefs.cpp PrologEpilogInserter.cpp PseudoSourceValue.cpp diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index ca3b36c17f1b..7ab69d7c326d 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -63,6 +63,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializePostMachineSchedulerPass(Registry); initializePostRAHazardRecognizerPass(Registry); initializePostRASchedulerPass(Registry); + initializePreISelIntrinsicLoweringPass(Registry); initializeProcessImplicitDefsPass(Registry); initializeRegisterCoalescerPass(Registry); initializeShrinkWrapPass(Registry); diff --git a/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/llvm/lib/CodeGen/LLVMTargetMachine.cpp index a19049360c0f..ea113c894cfb 100644 --- a/llvm/lib/CodeGen/LLVMTargetMachine.cpp +++ b/llvm/lib/CodeGen/LLVMTargetMachine.cpp @@ -102,6 +102,8 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM, if (TM->Options.EmulatedTLS) PM.add(createLowerEmuTLSPass(TM)); + PM.add(createPreISelIntrinsicLoweringPass()); + // Add internal analysis passes from the target machine. PM.add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis())); diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp new file mode 100644 index 000000000000..a1ff231412d8 --- /dev/null +++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp @@ -0,0 +1,85 @@ +//===-- PreISelIntrinsicLowering.cpp - Pre-ISel intrinsic lowering pass ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass implements IR lowering for the llvm.load.relative intrinsic. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" + +using namespace llvm; + +namespace { + +bool lowerLoadRelative(Function &F) { + if (F.use_empty()) + return false; + + bool Changed = false; + Type *Int32Ty = Type::getInt32Ty(F.getContext()); + Type *Int32PtrTy = Int32Ty->getPointerTo(); + Type *Int8Ty = Type::getInt8Ty(F.getContext()); + + for (auto I = F.use_begin(), E = F.use_end(); I != E;) { + auto CI = dyn_cast(I->getUser()); + ++I; + if (!CI || CI->getCalledValue() != &F) + continue; + + IRBuilder<> B(CI); + Value *OffsetPtr = + B.CreateGEP(Int8Ty, CI->getArgOperand(0), CI->getArgOperand(1)); + Value *OffsetPtrI32 = B.CreateBitCast(OffsetPtr, Int32PtrTy); + Value *OffsetI32 = B.CreateAlignedLoad(OffsetPtrI32, 4); + + Value *ResultPtr = B.CreateGEP(Int8Ty, CI->getArgOperand(0), OffsetI32); + + CI->replaceAllUsesWith(ResultPtr); + CI->eraseFromParent(); + Changed = true; + } + + return Changed; +} + +bool lowerIntrinsics(Module &M) { + bool Changed = false; + for (Function &F : M) { + if (F.getName().startswith("llvm.load.relative.")) + Changed |= lowerLoadRelative(F); + } + return Changed; +} + +class PreISelIntrinsicLowering : public ModulePass { +public: + static char ID; + PreISelIntrinsicLowering() : ModulePass(ID) {} + + bool runOnModule(Module &M) { + return lowerIntrinsics(M); + } +}; + +char PreISelIntrinsicLowering::ID; + +} + +INITIALIZE_PASS(PreISelIntrinsicLowering, "pre-isel-intrinsic-lowering", + "Pre-ISel Intrinsic Lowering", false, false) + +ModulePass *llvm::createPreISelIntrinsicLoweringPass() { + return new PreISelIntrinsicLowering; +} diff --git a/llvm/test/CodeGen/Generic/stop-after.ll b/llvm/test/CodeGen/Generic/stop-after.ll index 791378c3737d..07a60f7381ab 100644 --- a/llvm/test/CodeGen/Generic/stop-after.ll +++ b/llvm/test/CodeGen/Generic/stop-after.ll @@ -6,6 +6,6 @@ ; STOP-NEXT: Machine Function Analysis ; STOP-NEXT: MIR Printing Pass -; START: -machine-branch-prob -gc-lowering +; START: -machine-branch-prob -pre-isel-intrinsic-lowering ; START: FunctionPass Manager ; START-NEXT: Lower Garbage Collection Instructions diff --git a/llvm/test/Transforms/InstSimplify/load-relative-32.ll b/llvm/test/Transforms/InstSimplify/load-relative-32.ll new file mode 100644 index 000000000000..a38de8549dbd --- /dev/null +++ b/llvm/test/Transforms/InstSimplify/load-relative-32.ll @@ -0,0 +1,19 @@ +; RUN: opt < %s -instsimplify -S | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" +target triple = "i386-unknown-linux-gnu" + +@a = external global i8 + +@c1 = constant [3 x i32] [i32 0, i32 0, +i32 sub (i32 ptrtoint (i8* @a to i32), i32 ptrtoint (i32* getelementptr ([3 x i32], [3 x i32]* @c1, i32 0, i32 2) to i32)) +] + +; CHECK: @f1 +define i8* @f1() { + ; CHECK: ret i8* @a + %l = call i8* @llvm.load.relative.i32(i8* bitcast (i32* getelementptr ([3 x i32], [3 x i32]* @c1, i32 0, i32 2) to i8*), i32 0) + ret i8* %l +} + +declare i8* @llvm.load.relative.i32(i8*, i32) diff --git a/llvm/test/Transforms/InstSimplify/load-relative.ll b/llvm/test/Transforms/InstSimplify/load-relative.ll new file mode 100644 index 000000000000..3074ede2b697 --- /dev/null +++ b/llvm/test/Transforms/InstSimplify/load-relative.ll @@ -0,0 +1,75 @@ +; RUN: opt < %s -instsimplify -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@a = external global i8 +@b = external global i8 + +@c1 = constant i32 trunc (i64 sub (i64 ptrtoint (i8* @a to i64), i64 ptrtoint (i32* @c1 to i64)) to i32) +@c2 = constant [7 x i32] [i32 0, i32 0, +i32 trunc (i64 sub (i64 ptrtoint (i8* @a to i64), i64 ptrtoint (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i32 0, i32 2) to i64)) to i32), +i32 trunc (i64 sub (i64 ptrtoint (i8* @b to i64), i64 ptrtoint (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i32 0, i32 2) to i64)) to i32), +i32 trunc (i64 add (i64 ptrtoint (i8* @b to i64), i64 ptrtoint (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i32 0, i32 2) to i64)) to i32), +i32 trunc (i64 sub (i64 ptrtoint (i8* @b to i64), i64 1) to i32), +i32 trunc (i64 sub (i64 0, i64 ptrtoint (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i32 0, i32 2) to i64)) to i32) +] + +; CHECK: @f1 +define i8* @f1() { + ; CHECK: ret i8* @a + %l = call i8* @llvm.load.relative.i32(i8* bitcast (i32* @c1 to i8*), i32 0) + ret i8* %l +} + +; CHECK: @f2 +define i8* @f2() { + ; CHECK: ret i8* @a + %l = call i8* @llvm.load.relative.i32(i8* bitcast (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i64 0, i64 2) to i8*), i32 0) + ret i8* %l +} + +; CHECK: @f3 +define i8* @f3() { + ; CHECK: ret i8* @b + %l = call i8* @llvm.load.relative.i64(i8* bitcast (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i64 0, i64 2) to i8*), i64 4) + ret i8* %l +} + +; CHECK: @f4 +define i8* @f4() { + ; CHECK: ret i8* % + %l = call i8* @llvm.load.relative.i32(i8* bitcast (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i64 0, i64 2) to i8*), i32 1) + ret i8* %l +} + +; CHECK: @f5 +define i8* @f5() { + ; CHECK: ret i8* % + %l = call i8* @llvm.load.relative.i32(i8* zeroinitializer, i32 0) + ret i8* %l +} + +; CHECK: @f6 +define i8* @f6() { + ; CHECK: ret i8* % + %l = call i8* @llvm.load.relative.i32(i8* bitcast (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i64 0, i64 2) to i8*), i32 8) + ret i8* %l +} + +; CHECK: @f7 +define i8* @f7() { + ; CHECK: ret i8* % + %l = call i8* @llvm.load.relative.i32(i8* bitcast (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i64 0, i64 2) to i8*), i32 12) + ret i8* %l +} + +; CHECK: @f8 +define i8* @f8() { + ; CHECK: ret i8* % + %l = call i8* @llvm.load.relative.i32(i8* bitcast (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i64 0, i64 2) to i8*), i32 16) + ret i8* %l +} + +declare i8* @llvm.load.relative.i32(i8*, i32) +declare i8* @llvm.load.relative.i64(i8*, i64) diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/load-relative.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/load-relative.ll new file mode 100644 index 000000000000..56bedd9b101f --- /dev/null +++ b/llvm/test/Transforms/PreISelIntrinsicLowering/load-relative.ll @@ -0,0 +1,26 @@ +; RUN: opt -pre-isel-intrinsic-lowering -S -o - %s | FileCheck %s + +; CHECK: define i8* @foo32(i8* [[P:%.*]], i32 [[O:%.*]]) +define i8* @foo32(i8* %p, i32 %o) { + ; CHECK: [[OP:%.*]] = getelementptr i8, i8* [[P]], i32 [[O]] + ; CHECK: [[OPI32:%.*]] = bitcast i8* [[OP]] to i32* + ; CHECK: [[OI32:%.*]] = load i32, i32* [[OPI32]], align 4 + ; CHECK: [[R:%.*]] = getelementptr i8, i8* [[P]], i32 [[OI32]] + ; CHECK: ret i8* [[R]] + %l = call i8* @llvm.load.relative.i32(i8* %p, i32 %o) + ret i8* %l +} + +; CHECK: define i8* @foo64(i8* [[P:%.*]], i64 [[O:%.*]]) +define i8* @foo64(i8* %p, i64 %o) { + ; CHECK: [[OP:%.*]] = getelementptr i8, i8* [[P]], i64 [[O]] + ; CHECK: [[OPI32:%.*]] = bitcast i8* [[OP]] to i32* + ; CHECK: [[OI32:%.*]] = load i32, i32* [[OPI32]], align 4 + ; CHECK: [[R:%.*]] = getelementptr i8, i8* [[P]], i32 [[OI32]] + ; CHECK: ret i8* [[R]] + %l = call i8* @llvm.load.relative.i64(i8* %p, i64 %o) + ret i8* %l +} + +declare i8* @llvm.load.relative.i32(i8*, i32) +declare i8* @llvm.load.relative.i64(i8*, i64) diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp index 641735c29989..91993bfaa328 100644 --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -353,6 +353,7 @@ int main(int argc, char **argv) { initializeDwarfEHPreparePass(Registry); initializeSafeStackPass(Registry); initializeSjLjEHPreparePass(Registry); + initializePreISelIntrinsicLoweringPass(Registry); #ifdef LINK_POLLY_INTO_TOOLS polly::initializePollyPasses(Registry);