forked from OSchip/llvm-project
Introduce llvm.load.relative intrinsic.
This intrinsic takes two arguments, ``%ptr`` and ``%offset``. It loads a 32-bit value from the address ``%ptr + %offset``, adds ``%ptr`` to that value and returns it. The constant folder specifically recognizes the form of this intrinsic and the constant initializers it may load from; if a loaded constant initializer is known to have the form ``i32 trunc(x - %ptr)``, the intrinsic call is folded to ``x``. LLVM provides that the calculation of such a constant initializer will not overflow at link time under the medium code model if ``x`` is an ``unnamed_addr`` function. However, it does not provide this guarantee for a constant initializer folded into a function body. This intrinsic can be used to avoid the possibility of overflows when loading from such a constant. Differential Revision: http://reviews.llvm.org/D18367 llvm-svn: 267223
This commit is contained in:
parent
b29465fe47
commit
7dd8dbf486
|
@ -12361,6 +12361,31 @@ if"); and this allows for "check widening" type optimizations.
|
|||
``@llvm.experimental.guard`` cannot be invoked.
|
||||
|
||||
|
||||
'``llvm.load.relative``' Intrinsic
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Syntax:
|
||||
"""""""
|
||||
|
||||
::
|
||||
|
||||
declare i8* @llvm.load.relative.iN(i8* %ptr, iN %offset) argmemonly nounwind readonly
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
|
||||
This intrinsic loads a 32-bit value from the address ``%ptr + %offset``,
|
||||
adds ``%ptr`` to that value and returns it. The constant folder specifically
|
||||
recognizes the form of this intrinsic and the constant initializers it may
|
||||
load from; if a loaded constant initializer is known to have the form
|
||||
``i32 trunc(x - %ptr)``, the intrinsic call is folded to ``x``.
|
||||
|
||||
LLVM provides that the calculation of such a constant initializer will
|
||||
not overflow at link time under the medium code model if ``x`` is an
|
||||
``unnamed_addr`` function. However, it does not provide this guarantee for
|
||||
a constant initializer folded into a function body. This intrinsic can be
|
||||
used to avoid the possibility of overflows when loading from such a constant.
|
||||
|
||||
Stack Map Intrinsics
|
||||
--------------------
|
||||
|
||||
|
|
|
@ -680,6 +680,11 @@ namespace llvm {
|
|||
///
|
||||
ModulePass *createLowerEmuTLSPass(const TargetMachine *TM);
|
||||
|
||||
/// This pass lowers the @llvm.load.relative intrinsic to instructions.
|
||||
/// This is unsafe to do earlier because a pass may combine the constant
|
||||
/// initializer into the load, which may result in an overflowing evaluation.
|
||||
ModulePass *createPreISelIntrinsicLoweringPass();
|
||||
|
||||
/// GlobalMerge - This pass merges internal (by default) globals into structs
|
||||
/// to enable reuse of a base pointer by indexed addressing modes.
|
||||
/// It can also be configured to focus on size optimizations only.
|
||||
|
|
|
@ -669,6 +669,9 @@ def int_masked_scatter: Intrinsic<[],
|
|||
def int_bitset_test : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty, llvm_metadata_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_load_relative: Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_anyint_ty],
|
||||
[IntrReadMem, IntrArgMemOnly]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Target-specific intrinsics
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -246,6 +246,7 @@ void initializePostOrderFunctionAttrsLegacyPassPass(PassRegistry&);
|
|||
void initializePostRAHazardRecognizerPass(PassRegistry&);
|
||||
void initializePostRASchedulerPass(PassRegistry&);
|
||||
void initializePostMachineSchedulerPass(PassRegistry&);
|
||||
void initializePreISelIntrinsicLoweringPass(PassRegistry&);
|
||||
void initializePrintFunctionPassWrapperPass(PassRegistry&);
|
||||
void initializePrintModulePassWrapperPass(PassRegistry&);
|
||||
void initializePrintBasicBlockPassPass(PassRegistry&);
|
||||
|
|
|
@ -900,6 +900,11 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
|
|||
default:
|
||||
return Base::visitCallSite(CS);
|
||||
|
||||
case Intrinsic::load_relative:
|
||||
// This is normally lowered to 4 LLVM instructions.
|
||||
Cost += 3 * InlineConstants::InstrCost;
|
||||
return false;
|
||||
|
||||
case Intrinsic::memset:
|
||||
case Intrinsic::memcpy:
|
||||
case Intrinsic::memmove:
|
||||
|
|
|
@ -3825,6 +3825,62 @@ static bool IsIdempotent(Intrinsic::ID ID) {
|
|||
}
|
||||
}
|
||||
|
||||
static Value *SimplifyRelativeLoad(Constant *Ptr, Constant *Offset,
|
||||
const DataLayout &DL) {
|
||||
GlobalValue *PtrSym;
|
||||
APInt PtrOffset;
|
||||
if (!IsConstantOffsetFromGlobal(Ptr, PtrSym, PtrOffset, DL))
|
||||
return nullptr;
|
||||
|
||||
Type *Int8PtrTy = Type::getInt8PtrTy(Ptr->getContext());
|
||||
Type *Int32Ty = Type::getInt32Ty(Ptr->getContext());
|
||||
Type *Int32PtrTy = Int32Ty->getPointerTo();
|
||||
Type *Int64Ty = Type::getInt64Ty(Ptr->getContext());
|
||||
|
||||
auto *OffsetConstInt = dyn_cast<ConstantInt>(Offset);
|
||||
if (!OffsetConstInt || OffsetConstInt->getType()->getBitWidth() > 64)
|
||||
return nullptr;
|
||||
|
||||
uint64_t OffsetInt = OffsetConstInt->getSExtValue();
|
||||
if (OffsetInt % 4 != 0)
|
||||
return nullptr;
|
||||
|
||||
Constant *C = ConstantExpr::getGetElementPtr(
|
||||
Int32Ty, ConstantExpr::getBitCast(Ptr, Int32PtrTy),
|
||||
ConstantInt::get(Int64Ty, OffsetInt / 4));
|
||||
Constant *Loaded = ConstantFoldLoadFromConstPtr(C, Int32Ty, DL);
|
||||
if (!Loaded)
|
||||
return nullptr;
|
||||
|
||||
auto *LoadedCE = dyn_cast<ConstantExpr>(Loaded);
|
||||
if (!LoadedCE)
|
||||
return nullptr;
|
||||
|
||||
if (LoadedCE->getOpcode() == Instruction::Trunc) {
|
||||
LoadedCE = dyn_cast<ConstantExpr>(LoadedCE->getOperand(0));
|
||||
if (!LoadedCE)
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (LoadedCE->getOpcode() != Instruction::Sub)
|
||||
return nullptr;
|
||||
|
||||
auto *LoadedLHS = dyn_cast<ConstantExpr>(LoadedCE->getOperand(0));
|
||||
if (!LoadedLHS || LoadedLHS->getOpcode() != Instruction::PtrToInt)
|
||||
return nullptr;
|
||||
auto *LoadedLHSPtr = LoadedLHS->getOperand(0);
|
||||
|
||||
Constant *LoadedRHS = LoadedCE->getOperand(1);
|
||||
GlobalValue *LoadedRHSSym;
|
||||
APInt LoadedRHSOffset;
|
||||
if (!IsConstantOffsetFromGlobal(LoadedRHS, LoadedRHSSym, LoadedRHSOffset,
|
||||
DL) ||
|
||||
PtrSym != LoadedRHSSym || PtrOffset != LoadedRHSOffset)
|
||||
return nullptr;
|
||||
|
||||
return ConstantExpr::getBitCast(LoadedLHSPtr, Int8PtrTy);
|
||||
}
|
||||
|
||||
template <typename IterTy>
|
||||
static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
|
||||
const Query &Q, unsigned MaxRecurse) {
|
||||
|
@ -3865,6 +3921,11 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
|
|||
if (match(RHS, m_Undef()))
|
||||
return Constant::getNullValue(ReturnType);
|
||||
}
|
||||
|
||||
if (IID == Intrinsic::load_relative && isa<Constant>(LHS) &&
|
||||
isa<Constant>(RHS))
|
||||
return SimplifyRelativeLoad(cast<Constant>(LHS), cast<Constant>(RHS),
|
||||
Q.DL);
|
||||
}
|
||||
|
||||
// Perform idempotent optimizations
|
||||
|
|
|
@ -87,6 +87,7 @@ add_llvm_library(LLVMCodeGen
|
|||
PHIEliminationUtils.cpp
|
||||
PostRAHazardRecognizer.cpp
|
||||
PostRASchedulerList.cpp
|
||||
PreISelIntrinsicLowering.cpp
|
||||
ProcessImplicitDefs.cpp
|
||||
PrologEpilogInserter.cpp
|
||||
PseudoSourceValue.cpp
|
||||
|
|
|
@ -63,6 +63,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
|
|||
initializePostMachineSchedulerPass(Registry);
|
||||
initializePostRAHazardRecognizerPass(Registry);
|
||||
initializePostRASchedulerPass(Registry);
|
||||
initializePreISelIntrinsicLoweringPass(Registry);
|
||||
initializeProcessImplicitDefsPass(Registry);
|
||||
initializeRegisterCoalescerPass(Registry);
|
||||
initializeShrinkWrapPass(Registry);
|
||||
|
|
|
@ -102,6 +102,8 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
|
|||
if (TM->Options.EmulatedTLS)
|
||||
PM.add(createLowerEmuTLSPass(TM));
|
||||
|
||||
PM.add(createPreISelIntrinsicLoweringPass());
|
||||
|
||||
// Add internal analysis passes from the target machine.
|
||||
PM.add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
|
||||
|
||||
|
|
|
@ -0,0 +1,85 @@
|
|||
//===-- PreISelIntrinsicLowering.cpp - Pre-ISel intrinsic lowering pass ---===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This pass implements IR lowering for the llvm.load.relative intrinsic.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/CodeGen/Passes.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/Intrinsics.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/Pass.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
bool lowerLoadRelative(Function &F) {
|
||||
if (F.use_empty())
|
||||
return false;
|
||||
|
||||
bool Changed = false;
|
||||
Type *Int32Ty = Type::getInt32Ty(F.getContext());
|
||||
Type *Int32PtrTy = Int32Ty->getPointerTo();
|
||||
Type *Int8Ty = Type::getInt8Ty(F.getContext());
|
||||
|
||||
for (auto I = F.use_begin(), E = F.use_end(); I != E;) {
|
||||
auto CI = dyn_cast<CallInst>(I->getUser());
|
||||
++I;
|
||||
if (!CI || CI->getCalledValue() != &F)
|
||||
continue;
|
||||
|
||||
IRBuilder<> B(CI);
|
||||
Value *OffsetPtr =
|
||||
B.CreateGEP(Int8Ty, CI->getArgOperand(0), CI->getArgOperand(1));
|
||||
Value *OffsetPtrI32 = B.CreateBitCast(OffsetPtr, Int32PtrTy);
|
||||
Value *OffsetI32 = B.CreateAlignedLoad(OffsetPtrI32, 4);
|
||||
|
||||
Value *ResultPtr = B.CreateGEP(Int8Ty, CI->getArgOperand(0), OffsetI32);
|
||||
|
||||
CI->replaceAllUsesWith(ResultPtr);
|
||||
CI->eraseFromParent();
|
||||
Changed = true;
|
||||
}
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
bool lowerIntrinsics(Module &M) {
|
||||
bool Changed = false;
|
||||
for (Function &F : M) {
|
||||
if (F.getName().startswith("llvm.load.relative."))
|
||||
Changed |= lowerLoadRelative(F);
|
||||
}
|
||||
return Changed;
|
||||
}
|
||||
|
||||
class PreISelIntrinsicLowering : public ModulePass {
|
||||
public:
|
||||
static char ID;
|
||||
PreISelIntrinsicLowering() : ModulePass(ID) {}
|
||||
|
||||
bool runOnModule(Module &M) {
|
||||
return lowerIntrinsics(M);
|
||||
}
|
||||
};
|
||||
|
||||
char PreISelIntrinsicLowering::ID;
|
||||
|
||||
}
|
||||
|
||||
INITIALIZE_PASS(PreISelIntrinsicLowering, "pre-isel-intrinsic-lowering",
|
||||
"Pre-ISel Intrinsic Lowering", false, false)
|
||||
|
||||
ModulePass *llvm::createPreISelIntrinsicLoweringPass() {
|
||||
return new PreISelIntrinsicLowering;
|
||||
}
|
|
@ -6,6 +6,6 @@
|
|||
; STOP-NEXT: Machine Function Analysis
|
||||
; STOP-NEXT: MIR Printing Pass
|
||||
|
||||
; START: -machine-branch-prob -gc-lowering
|
||||
; START: -machine-branch-prob -pre-isel-intrinsic-lowering
|
||||
; START: FunctionPass Manager
|
||||
; START-NEXT: Lower Garbage Collection Instructions
|
||||
|
|
|
@ -0,0 +1,19 @@
|
|||
; RUN: opt < %s -instsimplify -S | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
|
||||
target triple = "i386-unknown-linux-gnu"
|
||||
|
||||
@a = external global i8
|
||||
|
||||
@c1 = constant [3 x i32] [i32 0, i32 0,
|
||||
i32 sub (i32 ptrtoint (i8* @a to i32), i32 ptrtoint (i32* getelementptr ([3 x i32], [3 x i32]* @c1, i32 0, i32 2) to i32))
|
||||
]
|
||||
|
||||
; CHECK: @f1
|
||||
define i8* @f1() {
|
||||
; CHECK: ret i8* @a
|
||||
%l = call i8* @llvm.load.relative.i32(i8* bitcast (i32* getelementptr ([3 x i32], [3 x i32]* @c1, i32 0, i32 2) to i8*), i32 0)
|
||||
ret i8* %l
|
||||
}
|
||||
|
||||
declare i8* @llvm.load.relative.i32(i8*, i32)
|
|
@ -0,0 +1,75 @@
|
|||
; RUN: opt < %s -instsimplify -S | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
@a = external global i8
|
||||
@b = external global i8
|
||||
|
||||
@c1 = constant i32 trunc (i64 sub (i64 ptrtoint (i8* @a to i64), i64 ptrtoint (i32* @c1 to i64)) to i32)
|
||||
@c2 = constant [7 x i32] [i32 0, i32 0,
|
||||
i32 trunc (i64 sub (i64 ptrtoint (i8* @a to i64), i64 ptrtoint (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i32 0, i32 2) to i64)) to i32),
|
||||
i32 trunc (i64 sub (i64 ptrtoint (i8* @b to i64), i64 ptrtoint (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i32 0, i32 2) to i64)) to i32),
|
||||
i32 trunc (i64 add (i64 ptrtoint (i8* @b to i64), i64 ptrtoint (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i32 0, i32 2) to i64)) to i32),
|
||||
i32 trunc (i64 sub (i64 ptrtoint (i8* @b to i64), i64 1) to i32),
|
||||
i32 trunc (i64 sub (i64 0, i64 ptrtoint (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i32 0, i32 2) to i64)) to i32)
|
||||
]
|
||||
|
||||
; CHECK: @f1
|
||||
define i8* @f1() {
|
||||
; CHECK: ret i8* @a
|
||||
%l = call i8* @llvm.load.relative.i32(i8* bitcast (i32* @c1 to i8*), i32 0)
|
||||
ret i8* %l
|
||||
}
|
||||
|
||||
; CHECK: @f2
|
||||
define i8* @f2() {
|
||||
; CHECK: ret i8* @a
|
||||
%l = call i8* @llvm.load.relative.i32(i8* bitcast (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i64 0, i64 2) to i8*), i32 0)
|
||||
ret i8* %l
|
||||
}
|
||||
|
||||
; CHECK: @f3
|
||||
define i8* @f3() {
|
||||
; CHECK: ret i8* @b
|
||||
%l = call i8* @llvm.load.relative.i64(i8* bitcast (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i64 0, i64 2) to i8*), i64 4)
|
||||
ret i8* %l
|
||||
}
|
||||
|
||||
; CHECK: @f4
|
||||
define i8* @f4() {
|
||||
; CHECK: ret i8* %
|
||||
%l = call i8* @llvm.load.relative.i32(i8* bitcast (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i64 0, i64 2) to i8*), i32 1)
|
||||
ret i8* %l
|
||||
}
|
||||
|
||||
; CHECK: @f5
|
||||
define i8* @f5() {
|
||||
; CHECK: ret i8* %
|
||||
%l = call i8* @llvm.load.relative.i32(i8* zeroinitializer, i32 0)
|
||||
ret i8* %l
|
||||
}
|
||||
|
||||
; CHECK: @f6
|
||||
define i8* @f6() {
|
||||
; CHECK: ret i8* %
|
||||
%l = call i8* @llvm.load.relative.i32(i8* bitcast (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i64 0, i64 2) to i8*), i32 8)
|
||||
ret i8* %l
|
||||
}
|
||||
|
||||
; CHECK: @f7
|
||||
define i8* @f7() {
|
||||
; CHECK: ret i8* %
|
||||
%l = call i8* @llvm.load.relative.i32(i8* bitcast (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i64 0, i64 2) to i8*), i32 12)
|
||||
ret i8* %l
|
||||
}
|
||||
|
||||
; CHECK: @f8
|
||||
define i8* @f8() {
|
||||
; CHECK: ret i8* %
|
||||
%l = call i8* @llvm.load.relative.i32(i8* bitcast (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i64 0, i64 2) to i8*), i32 16)
|
||||
ret i8* %l
|
||||
}
|
||||
|
||||
declare i8* @llvm.load.relative.i32(i8*, i32)
|
||||
declare i8* @llvm.load.relative.i64(i8*, i64)
|
|
@ -0,0 +1,26 @@
|
|||
; RUN: opt -pre-isel-intrinsic-lowering -S -o - %s | FileCheck %s
|
||||
|
||||
; CHECK: define i8* @foo32(i8* [[P:%.*]], i32 [[O:%.*]])
|
||||
define i8* @foo32(i8* %p, i32 %o) {
|
||||
; CHECK: [[OP:%.*]] = getelementptr i8, i8* [[P]], i32 [[O]]
|
||||
; CHECK: [[OPI32:%.*]] = bitcast i8* [[OP]] to i32*
|
||||
; CHECK: [[OI32:%.*]] = load i32, i32* [[OPI32]], align 4
|
||||
; CHECK: [[R:%.*]] = getelementptr i8, i8* [[P]], i32 [[OI32]]
|
||||
; CHECK: ret i8* [[R]]
|
||||
%l = call i8* @llvm.load.relative.i32(i8* %p, i32 %o)
|
||||
ret i8* %l
|
||||
}
|
||||
|
||||
; CHECK: define i8* @foo64(i8* [[P:%.*]], i64 [[O:%.*]])
|
||||
define i8* @foo64(i8* %p, i64 %o) {
|
||||
; CHECK: [[OP:%.*]] = getelementptr i8, i8* [[P]], i64 [[O]]
|
||||
; CHECK: [[OPI32:%.*]] = bitcast i8* [[OP]] to i32*
|
||||
; CHECK: [[OI32:%.*]] = load i32, i32* [[OPI32]], align 4
|
||||
; CHECK: [[R:%.*]] = getelementptr i8, i8* [[P]], i32 [[OI32]]
|
||||
; CHECK: ret i8* [[R]]
|
||||
%l = call i8* @llvm.load.relative.i64(i8* %p, i64 %o)
|
||||
ret i8* %l
|
||||
}
|
||||
|
||||
declare i8* @llvm.load.relative.i32(i8*, i32)
|
||||
declare i8* @llvm.load.relative.i64(i8*, i64)
|
|
@ -353,6 +353,7 @@ int main(int argc, char **argv) {
|
|||
initializeDwarfEHPreparePass(Registry);
|
||||
initializeSafeStackPass(Registry);
|
||||
initializeSjLjEHPreparePass(Registry);
|
||||
initializePreISelIntrinsicLoweringPass(Registry);
|
||||
|
||||
#ifdef LINK_POLLY_INTO_TOOLS
|
||||
polly::initializePollyPasses(Registry);
|
||||
|
|
Loading…
Reference in New Issue