[Attributor] Pointer privatization attribute (argument promotion)

A pointer is privatizeable if it can be replaced by a new, private one.
Privatizing pointer reduces the use count, interaction between unrelated
code parts. This is a first step towards replacing argument promotion.
While we can already handle recursion (unlike argument promotion!) we
are restricted to stack allocations for now because we do not analyze
the uses in the callee.

Reviewed By: uenoku

Differential Revision: https://reviews.llvm.org/D68852
This commit is contained in:
Johannes Doerfert 2019-10-30 17:20:20 -05:00
parent f0654875fb
commit 89c2e733e8
21 changed files with 1107 additions and 103 deletions

View File

@ -14,6 +14,7 @@
#include "llvm/IR/PassManager.h" #include "llvm/IR/PassManager.h"
namespace llvm { namespace llvm {
class TargetTransformInfo;
/// Argument promotion pass. /// Argument promotion pass.
/// ///
@ -26,6 +27,17 @@ class ArgumentPromotionPass : public PassInfoMixin<ArgumentPromotionPass> {
public: public:
ArgumentPromotionPass(unsigned MaxElements = 3u) : MaxElements(MaxElements) {} ArgumentPromotionPass(unsigned MaxElements = 3u) : MaxElements(MaxElements) {}
/// Check if callers and the callee \p F agree how promoted arguments would be
/// passed. The ones that they do not agree on are eliminated from the sets but
/// the return value has to be observed as well.
static bool areFunctionArgsABICompatible(
const Function &F, const TargetTransformInfo &TTI,
SmallPtrSetImpl<Argument *> &ArgsToPromote,
SmallPtrSetImpl<Argument *> &ByValArgsToTransform);
/// Checks if a type could have padding bytes.
static bool isDenselyPacked(Type *type, const DataLayout &DL);
PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
LazyCallGraph &CG, CGSCCUpdateResult &UR); LazyCallGraph &CG, CGSCCUpdateResult &UR);
}; };

View File

@ -104,6 +104,7 @@
#include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/CallSite.h" #include "llvm/IR/CallSite.h"
#include "llvm/IR/ConstantRange.h" #include "llvm/IR/ConstantRange.h"
#include "llvm/IR/PassManager.h" #include "llvm/IR/PassManager.h"
@ -951,6 +952,16 @@ struct Attributor {
friend struct Attributor; friend struct Attributor;
}; };
/// Check if we can rewrite a function signature.
///
/// The argument \p Arg is replaced with new ones defined by the number,
/// order, and types in \p ReplacementTypes.
///
/// \returns True, if the replacement can be registered, via
/// registerFunctionSignatureRewrite, false otherwise.
bool isValidFunctionSignatureRewrite(Argument &Arg,
ArrayRef<Type *> ReplacementTypes);
/// Register a rewrite for a function signature. /// Register a rewrite for a function signature.
/// ///
/// The argument \p Arg is replaced with new ones defined by the number, /// The argument \p Arg is replaced with new ones defined by the number,
@ -2402,6 +2413,45 @@ struct AAHeapToStack : public StateWrapper<BooleanState, AbstractAttribute>,
static const char ID; static const char ID;
}; };
/// An abstract interface for privatizability.
///
/// A pointer is privatizable if it can be replaced by a new, private one.
/// Privatizing pointer reduces the use count, interaction between unrelated
/// code parts.
///
/// In order for a pointer to be privatizable its value cannot be observed
/// (=nocapture), it is (for now) not written (=readonly & noalias), we know
/// what values are necessary to make the private copy look like the original
/// one, and the values we need can be loaded (=dereferenceable).
struct AAPrivatizablePtr : public StateWrapper<BooleanState, AbstractAttribute>,
public IRPosition {
AAPrivatizablePtr(const IRPosition &IRP) : IRPosition(IRP) {}
/// Returns true if pointer privatization is assumed to be possible.
bool isAssumedPrivatizablePtr() const { return getAssumed(); }
/// Returns true if pointer privatization is known to be possible.
bool isKnownPrivatizablePtr() const { return getKnown(); }
/// Return the type we can choose for a private copy of the underlying
/// value. None means it is not clear yet, nullptr means there is none.
virtual Optional<Type *> getPrivatizableType() const = 0;
/// Return an IR position, see struct IRPosition.
///
///{
IRPosition &getIRPosition() { return *this; }
const IRPosition &getIRPosition() const { return *this; }
///}
/// Create an abstract attribute view for the position \p IRP.
static AAPrivatizablePtr &createForPosition(const IRPosition &IRP,
Attributor &A);
/// Unique ID (due to the unique address)
static const char ID;
};
/// An abstract interface for all memory related attributes. /// An abstract interface for all memory related attributes.
struct AAMemoryBehavior struct AAMemoryBehavior
: public IRAttribute< : public IRAttribute<

View File

@ -774,8 +774,7 @@ static bool isSafeToPromoteArgument(Argument *Arg, Type *ByValTy, AAResults &AAR
return true; return true;
} }
/// Checks if a type could have padding bytes. bool ArgumentPromotionPass::isDenselyPacked(Type *type, const DataLayout &DL) {
static bool isDenselyPacked(Type *type, const DataLayout &DL) {
// There is no size information, so be conservative. // There is no size information, so be conservative.
if (!type->isSized()) if (!type->isSized())
return false; return false;
@ -844,12 +843,14 @@ static bool canPaddingBeAccessed(Argument *arg) {
return false; return false;
} }
static bool areFunctionArgsABICompatible( bool ArgumentPromotionPass::areFunctionArgsABICompatible(
const Function &F, const TargetTransformInfo &TTI, const Function &F, const TargetTransformInfo &TTI,
SmallPtrSetImpl<Argument *> &ArgsToPromote, SmallPtrSetImpl<Argument *> &ArgsToPromote,
SmallPtrSetImpl<Argument *> &ByValArgsToTransform) { SmallPtrSetImpl<Argument *> &ByValArgsToTransform) {
for (const Use &U : F.uses()) { for (const Use &U : F.uses()) {
CallSite CS(U.getUser()); CallSite CS(U.getUser());
if (!CS)
return false;
const Function *Caller = CS.getCaller(); const Function *Caller = CS.getCaller();
const Function *Callee = CS.getCalledFunction(); const Function *Callee = CS.getCalledFunction();
if (!TTI.areFunctionArgsABICompatible(Caller, Callee, ArgsToPromote) || if (!TTI.areFunctionArgsABICompatible(Caller, Callee, ArgsToPromote) ||
@ -951,9 +952,9 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
// If this is a byval argument, and if the aggregate type is small, just // If this is a byval argument, and if the aggregate type is small, just
// pass the elements, which is always safe, if the passed value is densely // pass the elements, which is always safe, if the passed value is densely
// packed or if we can prove the padding bytes are never accessed. // packed or if we can prove the padding bytes are never accessed.
bool isSafeToPromote = bool isSafeToPromote = PtrArg->hasByValAttr() &&
PtrArg->hasByValAttr() && (ArgumentPromotionPass::isDenselyPacked(AgTy, DL) ||
(isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg)); !canPaddingBeAccessed(PtrArg));
if (isSafeToPromote) { if (isSafeToPromote) {
if (StructType *STy = dyn_cast<StructType>(AgTy)) { if (StructType *STy = dyn_cast<StructType>(AgTy)) {
if (MaxElements > 0 && STy->getNumElements() > MaxElements) { if (MaxElements > 0 && STy->getNumElements() > MaxElements) {
@ -1011,8 +1012,8 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) if (ArgsToPromote.empty() && ByValArgsToTransform.empty())
return nullptr; return nullptr;
if (!areFunctionArgsABICompatible(*F, TTI, ArgsToPromote, if (!ArgumentPromotionPass::areFunctionArgsABICompatible(
ByValArgsToTransform)) *F, TTI, ArgsToPromote, ByValArgsToTransform))
return nullptr; return nullptr;
return doPromotion(F, ArgsToPromote, ByValArgsToTransform, ReplaceCallSite); return doPromotion(F, ArgsToPromote, ByValArgsToTransform, ReplaceCallSite);

View File

@ -31,13 +31,16 @@
#include "llvm/IR/Argument.h" #include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h" #include "llvm/IR/Attributes.h"
#include "llvm/IR/CFG.h" #include "llvm/IR/CFG.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h" #include "llvm/IR/InstIterator.h"
#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Verifier.h" #include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h" #include "llvm/InitializePasses.h"
#include "llvm/IR/NoFolder.h"
#include "llvm/Support/CommandLine.h" #include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h" #include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h" #include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO/ArgumentPromotion.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/Local.h"
@ -130,6 +133,7 @@ PIPE_OPERATOR(AAHeapToStack)
PIPE_OPERATOR(AAReachability) PIPE_OPERATOR(AAReachability)
PIPE_OPERATOR(AAMemoryBehavior) PIPE_OPERATOR(AAMemoryBehavior)
PIPE_OPERATOR(AAValueConstantRange) PIPE_OPERATOR(AAValueConstantRange)
PIPE_OPERATOR(AAPrivatizablePtr)
#undef PIPE_OPERATOR #undef PIPE_OPERATOR
} // namespace llvm } // namespace llvm
@ -309,6 +313,75 @@ static const Value *getPointerOperand(const Instruction *I,
return nullptr; return nullptr;
} }
/// Helper function to create a pointer of type \p ResTy, based on \p Ptr, and
/// advanced by \p Offset bytes. To aid later analysis the method tries to build
/// getelement pointer instructions that traverse the natural type of \p Ptr if
/// possible. If that fails, the remaining offset is adjusted byte-wise, hence
/// through a cast to i8*.
///
/// TODO: This could probably live somewhere more prominantly if it doesn't
/// already exist.
static Value *constructPointer(Type *ResTy, Value *Ptr, int64_t Offset,
IRBuilder<NoFolder> &IRB, const DataLayout &DL) {
assert(Offset >= 0 && "Negative offset not supported yet!");
LLVM_DEBUG(dbgs() << "Construct pointer: " << *Ptr << " + " << Offset
<< "-bytes as " << *ResTy << "\n");
// The initial type we are trying to traverse to get nice GEPs.
Type *Ty = Ptr->getType();
SmallVector<Value *, 4> Indices;
std::string GEPName = Ptr->getName().str();
while (Offset) {
uint64_t Idx, Rem;
if (auto *STy = dyn_cast<StructType>(Ty)) {
const StructLayout *SL = DL.getStructLayout(STy);
if (int64_t(SL->getSizeInBytes()) < Offset)
break;
Idx = SL->getElementContainingOffset(Offset);
assert(Idx < STy->getNumElements() && "Offset calculation error!");
Rem = Offset - SL->getElementOffset(Idx);
Ty = STy->getElementType(Idx);
} else if (auto *PTy = dyn_cast<PointerType>(Ty)) {
Ty = PTy->getElementType();
if (!Ty->isSized())
break;
uint64_t ElementSize = DL.getTypeAllocSize(Ty);
assert(ElementSize && "Expected type with size!");
Idx = Offset / ElementSize;
Rem = Offset % ElementSize;
} else {
// Non-aggregate type, we cast and make byte-wise progress now.
break;
}
LLVM_DEBUG(errs() << "Ty: " << *Ty << " Offset: " << Offset
<< " Idx: " << Idx << " Rem: " << Rem << "\n");
GEPName += "." + std::to_string(Idx);
Indices.push_back(ConstantInt::get(IRB.getInt32Ty(), Idx));
Offset = Rem;
}
// Create a GEP if we collected indices above.
if (Indices.size())
Ptr = IRB.CreateGEP(Ptr, Indices, GEPName);
// If an offset is left we use byte-wise adjustment.
if (Offset) {
Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy());
Ptr = IRB.CreateGEP(Ptr, IRB.getInt32(Offset),
GEPName + ".b" + Twine(Offset));
}
// Ensure the result has the requested type.
Ptr = IRB.CreateBitOrPointerCast(Ptr, ResTy, Ptr->getName() + ".cast");
LLVM_DEBUG(dbgs() << "Constructed pointer: " << *Ptr << "\n");
return Ptr;
}
/// Recursively visit all values that might become \p IRP at some point. This /// Recursively visit all values that might become \p IRP at some point. This
/// will be done by looking through cast instructions, selects, phis, and calls /// will be done by looking through cast instructions, selects, phis, and calls
/// with the "returned" attribute. Once we cannot look through the value any /// with the "returned" attribute. Once we cannot look through the value any
@ -2796,7 +2869,7 @@ struct AAIsDeadArgument : public AAIsDeadFloating {
ChangeStatus manifest(Attributor &A) override { ChangeStatus manifest(Attributor &A) override {
ChangeStatus Changed = AAIsDeadFloating::manifest(A); ChangeStatus Changed = AAIsDeadFloating::manifest(A);
Argument &Arg = *getAssociatedArgument(); Argument &Arg = *getAssociatedArgument();
if (Arg.getParent()->hasLocalLinkage()) if (A.isValidFunctionSignatureRewrite(Arg, /* ReplacementTypes */ {}))
if (A.registerFunctionSignatureRewrite( if (A.registerFunctionSignatureRewrite(
Arg, /* ReplacementTypes */ {}, Arg, /* ReplacementTypes */ {},
Attributor::ArgumentReplacementInfo::CalleeRepairCBTy{}, Attributor::ArgumentReplacementInfo::CalleeRepairCBTy{},
@ -4761,6 +4834,575 @@ struct AAHeapToStackFunction final : public AAHeapToStackImpl {
} }
}; };
/// ----------------------- Privatizable Pointers ------------------------------
struct AAPrivatizablePtrImpl : public AAPrivatizablePtr {
AAPrivatizablePtrImpl(const IRPosition &IRP)
: AAPrivatizablePtr(IRP), PrivatizableType(llvm::None) {}
ChangeStatus indicatePessimisticFixpoint() override {
AAPrivatizablePtr::indicatePessimisticFixpoint();
PrivatizableType = nullptr;
return ChangeStatus::CHANGED;
}
/// Identify the type we can chose for a private copy of the underlying
/// argument. None means it is not clear yet, nullptr means there is none.
virtual Optional<Type *> identifyPrivatizableType(Attributor &A) = 0;
/// Return a privatizable type that encloses both T0 and T1.
/// TODO: This is merely a stub for now as we should manage a mapping as well.
Optional<Type *> combineTypes(Optional<Type *> T0, Optional<Type *> T1) {
if (!T0.hasValue())
return T1;
if (!T1.hasValue())
return T0;
if (T0 == T1)
return T0;
return nullptr;
}
Optional<Type *> getPrivatizableType() const override {
return PrivatizableType;
}
const std::string getAsStr() const override {
return isAssumedPrivatizablePtr() ? "[priv]" : "[no-priv]";
}
protected:
Optional<Type *> PrivatizableType;
};
// TODO: Do this for call site arguments (probably also other values) as well.
struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
AAPrivatizablePtrArgument(const IRPosition &IRP)
: AAPrivatizablePtrImpl(IRP) {}
/// See AAPrivatizablePtrImpl::identifyPrivatizableType(...)
Optional<Type *> identifyPrivatizableType(Attributor &A) override {
// If this is a byval argument and we know all the call sites (so we can
// rewrite them), there is no need to check them explicitly.
if (getIRPosition().hasAttr(Attribute::ByVal) &&
A.checkForAllCallSites([](AbstractCallSite ACS) { return true; }, *this,
true))
return getAssociatedValue().getType()->getPointerElementType();
Optional<Type *> Ty;
unsigned ArgNo = getIRPosition().getArgNo();
// Make sure the associated call site argument has the same type at all call
// sites and it is an allocation we know is safe to privatize, for now that
// means we only allow alloca instructions.
// TODO: We can additionally analyze the accesses in the callee to create
// the type from that information instead. That is a little more
// involved and will be done in a follow up patch.
auto CallSiteCheck = [&](AbstractCallSite ACS) {
IRPosition ACSArgPos = IRPosition::callsite_argument(ACS, ArgNo);
// Check if a coresponding argument was found or if it is one not
// associated (which can happen for callback calls).
if (ACSArgPos.getPositionKind() == IRPosition::IRP_INVALID)
return false;
// Check that all call sites agree on a type.
auto &PrivCSArgAA = A.getAAFor<AAPrivatizablePtr>(*this, ACSArgPos);
Optional<Type *> CSTy = PrivCSArgAA.getPrivatizableType();
LLVM_DEBUG({
dbgs() << "[AAPrivatizablePtr] ACSPos: " << ACSArgPos << ", CSTy: ";
if (CSTy.hasValue() && CSTy.getValue())
CSTy.getValue()->print(dbgs());
else if (CSTy.hasValue())
dbgs() << "<nullptr>";
else
dbgs() << "<none>";
});
Ty = combineTypes(Ty, CSTy);
LLVM_DEBUG({
dbgs() << " : New Type: ";
if (Ty.hasValue() && Ty.getValue())
Ty.getValue()->print(dbgs());
else if (Ty.hasValue())
dbgs() << "<nullptr>";
else
dbgs() << "<none>";
dbgs() << "\n";
});
return !Ty.hasValue() || Ty.getValue();
};
if (!A.checkForAllCallSites(CallSiteCheck, *this, true))
return nullptr;
return Ty;
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
PrivatizableType = identifyPrivatizableType(A);
if (!PrivatizableType.hasValue())
return ChangeStatus::UNCHANGED;
if (!PrivatizableType.getValue())
return indicatePessimisticFixpoint();
// Avoid arguments with padding for now.
if (!getIRPosition().hasAttr(Attribute::ByVal) &&
!ArgumentPromotionPass::isDenselyPacked(PrivatizableType.getValue(),
A.getInfoCache().getDL())) {
LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] Padding detected\n");
return indicatePessimisticFixpoint();
}
// Verify callee and caller agree on how the promoted argument would be
// passed.
// TODO: The use of the ArgumentPromotion interface here is ugly, we need a
// specialized form of TargetTransformInfo::areFunctionArgsABICompatible
// which doesn't require the arguments ArgumentPromotion wanted to pass.
Function &Fn = *getIRPosition().getAnchorScope();
SmallPtrSet<Argument *, 1> ArgsToPromote, Dummy;
ArgsToPromote.insert(getAssociatedArgument());
const auto *TTI =
A.getInfoCache().getAnalysisResultForFunction<TargetIRAnalysis>(Fn);
if (!TTI ||
!ArgumentPromotionPass::areFunctionArgsABICompatible(
Fn, *TTI, ArgsToPromote, Dummy) ||
ArgsToPromote.empty()) {
LLVM_DEBUG(
dbgs() << "[AAPrivatizablePtr] ABI incompatibility detected for "
<< Fn.getName() << "\n");
return indicatePessimisticFixpoint();
}
// Collect the types that will replace the privatizable type in the function
// signature.
SmallVector<Type *, 16> ReplacementTypes;
identifyReplacementTypes(PrivatizableType.getValue(), ReplacementTypes);
// Register a rewrite of the argument.
Argument *Arg = getAssociatedArgument();
if (!A.isValidFunctionSignatureRewrite(*Arg, ReplacementTypes)) {
LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] Rewrite not valid\n");
return indicatePessimisticFixpoint();
}
unsigned ArgNo = Arg->getArgNo();
// Helper to check if for the given call site the associated argument is
// passed to a callback where the privatization would be different.
auto IsCompatiblePrivArgOfCallback = [&](CallSite CS) {
Value *CSArgOp = CS.getArgOperand(ArgNo);
SmallVector<const Use *, 4> CBUses;
AbstractCallSite::getCallbackUses(CS, CBUses);
for (const Use *U : CBUses) {
AbstractCallSite CBACS(U);
assert(CBACS && CBACS.isCallbackCall());
for (Argument &CBArg : CBACS.getCalledFunction()->args()) {
int CBArgNo = CBACS.getCallArgOperandNo(CBArg);
LLVM_DEBUG({
dbgs()
<< "[AAPrivatizablePtr] Argument " << *Arg
<< "check if can be privatized in the context of its parent ("
<< Arg->getParent()->getName()
<< ")\n[AAPrivatizablePtr] because it is an argument in a "
"callback ("
<< CBArgNo << "@" << CBACS.getCalledFunction()->getName()
<< ")\n[AAPrivatizablePtr] " << CBArg << " : "
<< CBACS.getCallArgOperand(CBArg) << " vs " << CSArgOp << "\n"
<< "[AAPrivatizablePtr] " << CBArg << " : "
<< CBACS.getCallArgOperandNo(CBArg) << " vs " << ArgNo << "\n";
});
if (CBArgNo != int(ArgNo))
continue;
const auto &CBArgPrivAA =
A.getAAFor<AAPrivatizablePtr>(*this, IRPosition::argument(CBArg));
if (CBArgPrivAA.isValidState()) {
auto CBArgPrivTy = CBArgPrivAA.getPrivatizableType();
if (!CBArgPrivTy.hasValue())
continue;
if (CBArgPrivTy.getValue() == PrivatizableType)
continue;
}
LLVM_DEBUG({
dbgs() << "[AAPrivatizablePtr] Argument " << *Arg
<< " cannot be privatized in the context of its parent ("
<< Arg->getParent()->getName()
<< ")\n[AAPrivatizablePtr] because it is an argument in a "
"callback ("
<< CBArgNo << "@" << CBACS.getCalledFunction()->getName()
<< ").\n[AAPrivatizablePtr] for which the argument "
"privatization is not compatible.\n";
});
return false;
}
}
return true;
};
// Helper to check if for the given call site the associated argument is
// passed to a direct call where the privatization would be different.
auto IsCompatiblePrivArgOfDirectCS = [&](AbstractCallSite ACS) {
CallBase *DC = cast<CallBase>(ACS.getInstruction());
int DCArgNo = ACS.getCallArgOperandNo(ArgNo);
assert(DCArgNo >= 0 && unsigned(DCArgNo) < DC->getNumArgOperands() &&
"Expected a direct call operand for callback call operand");
LLVM_DEBUG({
dbgs() << "[AAPrivatizablePtr] Argument " << *Arg
<< " check if be privatized in the context of its parent ("
<< Arg->getParent()->getName()
<< ")\n[AAPrivatizablePtr] because it is an argument in a "
"direct call of ("
<< DCArgNo << "@" << DC->getCalledFunction()->getName()
<< ").\n";
});
Function *DCCallee = DC->getCalledFunction();
if (unsigned(DCArgNo) < DCCallee->arg_size()) {
const auto &DCArgPrivAA = A.getAAFor<AAPrivatizablePtr>(
*this, IRPosition::argument(*DCCallee->getArg(DCArgNo)));
if (DCArgPrivAA.isValidState()) {
auto DCArgPrivTy = DCArgPrivAA.getPrivatizableType();
if (!DCArgPrivTy.hasValue())
return true;
if (DCArgPrivTy.getValue() == PrivatizableType)
return true;
}
}
LLVM_DEBUG({
dbgs() << "[AAPrivatizablePtr] Argument " << *Arg
<< " cannot be privatized in the context of its parent ("
<< Arg->getParent()->getName()
<< ")\n[AAPrivatizablePtr] because it is an argument in a "
"direct call of ("
<< ACS.getCallSite().getCalledFunction()->getName()
<< ").\n[AAPrivatizablePtr] for which the argument "
"privatization is not compatible.\n";
});
return false;
};
// Helper to check if the associated argument is used at the given abstract
// call site in a way that is incompatible with the privatization assumed
// here.
auto IsCompatiblePrivArgOfOtherCallSite = [&](AbstractCallSite ACS) {
if (ACS.isDirectCall())
return IsCompatiblePrivArgOfCallback(ACS.getCallSite());
if (ACS.isCallbackCall())
return IsCompatiblePrivArgOfDirectCS(ACS);
return false;
};
if (!A.checkForAllCallSites(IsCompatiblePrivArgOfOtherCallSite, *this,
true))
return indicatePessimisticFixpoint();
return ChangeStatus::UNCHANGED;
}
/// Given a type to private \p PrivType, collect the constituates (which are
/// used) in \p ReplacementTypes.
static void
identifyReplacementTypes(Type *PrivType,
SmallVectorImpl<Type *> &ReplacementTypes) {
// TODO: For now we expand the privatization type to the fullest which can
// lead to dead arguments that need to be removed later.
assert(PrivType && "Expected privatizable type!");
// Traverse the type, extract constituate types on the outermost level.
if (auto *PrivStructType = dyn_cast<StructType>(PrivType)) {
for (unsigned u = 0, e = PrivStructType->getNumElements(); u < e; u++)
ReplacementTypes.push_back(PrivStructType->getElementType(u));
} else if (auto *PrivArrayType = dyn_cast<ArrayType>(PrivType)) {
ReplacementTypes.append(PrivArrayType->getNumElements(),
PrivArrayType->getElementType());
} else {
ReplacementTypes.push_back(PrivType);
}
}
/// Initialize \p Base according to the type \p PrivType at position \p IP.
/// The values needed are taken from the arguments of \p F starting at
/// position \p ArgNo.
static void createInitialization(Type *PrivType, Value &Base, Function &F,
unsigned ArgNo, Instruction &IP) {
assert(PrivType && "Expected privatizable type!");
IRBuilder<NoFolder> IRB(&IP);
const DataLayout &DL = F.getParent()->getDataLayout();
// Traverse the type, build GEPs and stores.
if (auto *PrivStructType = dyn_cast<StructType>(PrivType)) {
const StructLayout *PrivStructLayout = DL.getStructLayout(PrivStructType);
for (unsigned u = 0, e = PrivStructType->getNumElements(); u < e; u++) {
Type *PointeeTy = PrivStructType->getElementType(u)->getPointerTo();
Value *Ptr = constructPointer(
PointeeTy, &Base, PrivStructLayout->getElementOffset(u), IRB, DL);
new StoreInst(F.getArg(ArgNo + u), Ptr, &IP);
}
} else if (auto *PrivArrayType = dyn_cast<ArrayType>(PrivType)) {
Type *PointeePtrTy = PrivArrayType->getElementType()->getPointerTo();
uint64_t PointeeTySize = DL.getTypeStoreSize(PointeePtrTy);
for (unsigned u = 0, e = PrivArrayType->getNumElements(); u < e; u++) {
Value *Ptr =
constructPointer(PointeePtrTy, &Base, u * PointeeTySize, IRB, DL);
new StoreInst(F.getArg(ArgNo + u), Ptr, &IP);
}
} else {
new StoreInst(F.getArg(ArgNo), &Base, &IP);
}
}
/// Extract values from \p Base according to the type \p PrivType at the
/// call position \p ACS. The values are appended to \p ReplacementValues.
void createReplacementValues(Type *PrivType, AbstractCallSite ACS,
Value *Base,
SmallVectorImpl<Value *> &ReplacementValues) {
assert(Base && "Expected base value!");
assert(PrivType && "Expected privatizable type!");
Instruction *IP = ACS.getInstruction();
IRBuilder<NoFolder> IRB(IP);
const DataLayout &DL = IP->getModule()->getDataLayout();
if (Base->getType()->getPointerElementType() != PrivType)
Base = BitCastInst::CreateBitOrPointerCast(Base, PrivType->getPointerTo(),
"", ACS.getInstruction());
// TODO: Improve the alignment of the loads.
// Traverse the type, build GEPs and loads.
if (auto *PrivStructType = dyn_cast<StructType>(PrivType)) {
const StructLayout *PrivStructLayout = DL.getStructLayout(PrivStructType);
for (unsigned u = 0, e = PrivStructType->getNumElements(); u < e; u++) {
Type *PointeeTy = PrivStructType->getElementType(u);
Value *Ptr =
constructPointer(PointeeTy->getPointerTo(), Base,
PrivStructLayout->getElementOffset(u), IRB, DL);
LoadInst *L = new LoadInst(PointeeTy, Ptr, "", IP);
L->setAlignment(MaybeAlign(1));
ReplacementValues.push_back(L);
}
} else if (auto *PrivArrayType = dyn_cast<ArrayType>(PrivType)) {
Type *PointeeTy = PrivArrayType->getElementType();
uint64_t PointeeTySize = DL.getTypeStoreSize(PointeeTy);
Type *PointeePtrTy = PointeeTy->getPointerTo();
for (unsigned u = 0, e = PrivArrayType->getNumElements(); u < e; u++) {
Value *Ptr =
constructPointer(PointeePtrTy, Base, u * PointeeTySize, IRB, DL);
LoadInst *L = new LoadInst(PointeePtrTy, Ptr, "", IP);
L->setAlignment(MaybeAlign(1));
ReplacementValues.push_back(L);
}
} else {
LoadInst *L = new LoadInst(PrivType, Base, "", IP);
L->setAlignment(MaybeAlign(1));
ReplacementValues.push_back(L);
}
}
/// See AbstractAttribute::manifest(...)
ChangeStatus manifest(Attributor &A) override {
if (!PrivatizableType.hasValue())
return ChangeStatus::UNCHANGED;
assert(PrivatizableType.getValue() && "Expected privatizable type!");
// Collect all tail calls in the function as we cannot allow new allocas to
// escape into tail recursion.
// TODO: Be smarter about new allocas escaping into tail calls.
SmallVector<CallInst *, 16> TailCalls;
if (!A.checkForAllInstructions(
[&](Instruction &I) {
CallInst &CI = cast<CallInst>(I);
if (CI.isTailCall())
TailCalls.push_back(&CI);
return true;
},
*this, {Instruction::Call}))
return ChangeStatus::UNCHANGED;
Argument *Arg = getAssociatedArgument();
// Callback to repair the associated function. A new alloca is placed at the
// beginning and initialized with the values passed through arguments. The
// new alloca replaces the use of the old pointer argument.
Attributor::ArgumentReplacementInfo::CalleeRepairCBTy FnRepairCB =
[=](const Attributor::ArgumentReplacementInfo &ARI,
Function &ReplacementFn, Function::arg_iterator ArgIt) {
BasicBlock &EntryBB = ReplacementFn.getEntryBlock();
Instruction *IP = &*EntryBB.getFirstInsertionPt();
auto *AI = new AllocaInst(PrivatizableType.getValue(), 0,
Arg->getName() + ".priv", IP);
createInitialization(PrivatizableType.getValue(), *AI, ReplacementFn,
ArgIt->getArgNo(), *IP);
Arg->replaceAllUsesWith(AI);
for (CallInst *CI : TailCalls)
CI->setTailCall(false);
};
// Callback to repair a call site of the associated function. The elements
// of the privatizable type are loaded prior to the call and passed to the
// new function version.
Attributor::ArgumentReplacementInfo::ACSRepairCBTy ACSRepairCB =
[=](const Attributor::ArgumentReplacementInfo &ARI,
AbstractCallSite ACS, SmallVectorImpl<Value *> &NewArgOperands) {
createReplacementValues(
PrivatizableType.getValue(), ACS,
ACS.getCallArgOperand(ARI.getReplacedArg().getArgNo()),
NewArgOperands);
};
// Collect the types that will replace the privatizable type in the function
// signature.
SmallVector<Type *, 16> ReplacementTypes;
identifyReplacementTypes(PrivatizableType.getValue(), ReplacementTypes);
// Register a rewrite of the argument.
if (A.registerFunctionSignatureRewrite(*Arg, ReplacementTypes,
std::move(FnRepairCB),
std::move(ACSRepairCB)))
return ChangeStatus::CHANGED;
return ChangeStatus::UNCHANGED;
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECLTRACK_ARG_ATTR(privatizable_ptr);
}
};
struct AAPrivatizablePtrFloating : public AAPrivatizablePtrImpl {
AAPrivatizablePtrFloating(const IRPosition &IRP)
: AAPrivatizablePtrImpl(IRP) {}
/// See AbstractAttribute::initialize(...).
virtual void initialize(Attributor &A) override {
// TODO: We can privatize more than arguments.
indicatePessimisticFixpoint();
}
ChangeStatus updateImpl(Attributor &A) override {
llvm_unreachable("AAPrivatizablePtr(Floating|Returned|CallSiteReturned)::"
"updateImpl will not be called");
}
/// See AAPrivatizablePtrImpl::identifyPrivatizableType(...)
Optional<Type *> identifyPrivatizableType(Attributor &A) override {
Value *Obj =
GetUnderlyingObject(&getAssociatedValue(), A.getInfoCache().getDL());
if (!Obj) {
LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] No underlying object found!\n");
return nullptr;
}
if (auto *AI = dyn_cast<AllocaInst>(Obj))
if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize()))
if (CI->isOne())
return Obj->getType()->getPointerElementType();
if (auto *Arg = dyn_cast<Argument>(Obj)) {
auto &PrivArgAA =
A.getAAFor<AAPrivatizablePtr>(*this, IRPosition::argument(*Arg));
if (PrivArgAA.isAssumedPrivatizablePtr())
return Obj->getType()->getPointerElementType();
}
LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] Underlying object neither valid "
"alloca nor privatizable argument: "
<< *Obj << "!\n");
return nullptr;
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECLTRACK_FLOATING_ATTR(privatizable_ptr);
}
};
struct AAPrivatizablePtrCallSiteArgument final
: public AAPrivatizablePtrFloating {
AAPrivatizablePtrCallSiteArgument(const IRPosition &IRP)
: AAPrivatizablePtrFloating(IRP) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
if (getIRPosition().hasAttr(Attribute::ByVal))
indicateOptimisticFixpoint();
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
PrivatizableType = identifyPrivatizableType(A);
if (!PrivatizableType.hasValue())
return ChangeStatus::UNCHANGED;
if (!PrivatizableType.getValue())
return indicatePessimisticFixpoint();
const IRPosition &IRP = getIRPosition();
auto &NoCaptureAA = A.getAAFor<AANoCapture>(*this, IRP);
if (!NoCaptureAA.isAssumedNoCapture()) {
LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] pointer might be captured!\n");
return indicatePessimisticFixpoint();
}
auto &NoAliasAA = A.getAAFor<AANoAlias>(*this, IRP);
if (!NoAliasAA.isAssumedNoAlias()) {
LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] pointer might alias!\n");
return indicatePessimisticFixpoint();
}
const auto &MemBehaviorAA = A.getAAFor<AAMemoryBehavior>(*this, IRP);
if (!MemBehaviorAA.isAssumedReadOnly()) {
LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] pointer is written!\n");
return indicatePessimisticFixpoint();
}
return ChangeStatus::UNCHANGED;
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECLTRACK_CSARG_ATTR(privatizable_ptr);
}
};
struct AAPrivatizablePtrCallSiteReturned final
: public AAPrivatizablePtrFloating {
AAPrivatizablePtrCallSiteReturned(const IRPosition &IRP)
: AAPrivatizablePtrFloating(IRP) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
// TODO: We can privatize more than arguments.
indicatePessimisticFixpoint();
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECLTRACK_CSRET_ATTR(privatizable_ptr);
}
};
struct AAPrivatizablePtrReturned final : public AAPrivatizablePtrFloating {
AAPrivatizablePtrReturned(const IRPosition &IRP)
: AAPrivatizablePtrFloating(IRP) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
// TODO: We can privatize more than arguments.
indicatePessimisticFixpoint();
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECLTRACK_FNRET_ATTR(privatizable_ptr);
}
};
/// -------------------- Memory Behavior Attributes ---------------------------- /// -------------------- Memory Behavior Attributes ----------------------------
/// Includes read-none, read-only, and write-only. /// Includes read-none, read-only, and write-only.
/// ---------------------------------------------------------------------------- /// ----------------------------------------------------------------------------
@ -6311,10 +6953,8 @@ ChangeStatus Attributor::run(Module &M) {
return ManifestChange; return ManifestChange;
} }
bool Attributor::registerFunctionSignatureRewrite( bool Attributor::isValidFunctionSignatureRewrite(
Argument &Arg, ArrayRef<Type *> ReplacementTypes, Argument &Arg, ArrayRef<Type *> ReplacementTypes) {
ArgumentReplacementInfo::CalleeRepairCBTy &&CalleeRepairCB,
ArgumentReplacementInfo::ACSRepairCBTy &&ACSRepairCB) {
auto CallSiteCanBeChanged = [](AbstractCallSite ACS) { auto CallSiteCanBeChanged = [](AbstractCallSite ACS) {
// Forbid must-tail calls for now. // Forbid must-tail calls for now.
@ -6360,8 +7000,22 @@ bool Attributor::registerFunctionSignatureRewrite(
return false; return false;
} }
return true;
}
bool Attributor::registerFunctionSignatureRewrite(
Argument &Arg, ArrayRef<Type *> ReplacementTypes,
ArgumentReplacementInfo::CalleeRepairCBTy &&CalleeRepairCB,
ArgumentReplacementInfo::ACSRepairCBTy &&ACSRepairCB) {
LLVM_DEBUG(dbgs() << "[Attributor] Register new rewrite of " << Arg << " in "
<< Arg.getParent()->getName() << " with "
<< ReplacementTypes.size() << " replacements\n");
assert(isValidFunctionSignatureRewrite(Arg, ReplacementTypes) &&
"Cannot register an invalid rewrite");
Function *Fn = Arg.getParent();
SmallVectorImpl<ArgumentReplacementInfo *> &ARIs = ArgumentReplacementMap[Fn]; SmallVectorImpl<ArgumentReplacementInfo *> &ARIs = ArgumentReplacementMap[Fn];
if (ARIs.size() == 0) if (ARIs.empty())
ARIs.resize(Fn->arg_size()); ARIs.resize(Fn->arg_size());
// If we have a replacement already with less than or equal new arguments, // If we have a replacement already with less than or equal new arguments,
@ -6377,6 +7031,10 @@ bool Attributor::registerFunctionSignatureRewrite(
if (ARI) if (ARI)
delete ARI; delete ARI;
LLVM_DEBUG(dbgs() << "[Attributor] Register new rewrite of " << Arg << " in "
<< Arg.getParent()->getName() << " with "
<< ReplacementTypes.size() << " replacements\n");
// Remember the replacement. // Remember the replacement.
ARI = new ArgumentReplacementInfo(*this, Arg, ReplacementTypes, ARI = new ArgumentReplacementInfo(*this, Arg, ReplacementTypes,
std::move(CalleeRepairCB), std::move(CalleeRepairCB),
@ -6718,6 +7376,9 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
// Every argument with pointer type might be marked nofree. // Every argument with pointer type might be marked nofree.
getOrCreateAAFor<AANoFree>(ArgPos); getOrCreateAAFor<AANoFree>(ArgPos);
// Every argument with pointer type might be privatizable (or promotable)
getOrCreateAAFor<AAPrivatizablePtr>(ArgPos);
} }
} }
@ -6972,6 +7633,7 @@ const char AAAlign::ID = 0;
const char AANoCapture::ID = 0; const char AANoCapture::ID = 0;
const char AAValueSimplify::ID = 0; const char AAValueSimplify::ID = 0;
const char AAHeapToStack::ID = 0; const char AAHeapToStack::ID = 0;
const char AAPrivatizablePtr::ID = 0;
const char AAMemoryBehavior::ID = 0; const char AAMemoryBehavior::ID = 0;
const char AAValueConstantRange::ID = 0; const char AAValueConstantRange::ID = 0;
@ -7076,6 +7738,7 @@ CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAReturnedValues)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANonNull) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANonNull)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoAlias) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoAlias)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPrivatizablePtr)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AADereferenceable) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AADereferenceable)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAlign) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAlign)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoCapture) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoCapture)

View File

@ -3,9 +3,11 @@
define internal i32 @deref(i32* %x) nounwind { define internal i32 @deref(i32* %x) nounwind {
; CHECK-LABEL: define {{[^@]+}}@deref ; CHECK-LABEL: define {{[^@]+}}@deref
; CHECK-SAME: (i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[X:%.*]]) ; CHECK-SAME: (i32 [[TMP0:%.*]])
; CHECK-NEXT: entry: ; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[X]], align 4 ; CHECK-NEXT: [[X_PRIV:%.*]] = alloca i32
; CHECK-NEXT: store i32 [[TMP0]], i32* [[X_PRIV]]
; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[X_PRIV]], align 4
; CHECK-NEXT: ret i32 [[TMP2]] ; CHECK-NEXT: ret i32 [[TMP2]]
; ;
entry: entry:
@ -19,7 +21,8 @@ define i32 @f(i32 %x) {
; CHECK-NEXT: entry: ; CHECK-NEXT: entry:
; CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32 ; CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32
; CHECK-NEXT: store i32 [[X]], i32* [[X_ADDR]], align 4 ; CHECK-NEXT: store i32 [[X]], i32* [[X_ADDR]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @deref(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[X_ADDR]]) ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[X_ADDR]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @deref(i32 [[TMP0]])
; CHECK-NEXT: ret i32 [[TMP1]] ; CHECK-NEXT: ret i32 [[TMP1]]
; ;
entry: entry:

View File

@ -45,9 +45,11 @@ bb:
define internal fastcc void @promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %arg1) #0 { define internal fastcc void @promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %arg1) #0 {
; CHECK-LABEL: define {{[^@]+}}@promote_avx2 ; CHECK-LABEL: define {{[^@]+}}@promote_avx2
; CHECK-SAME: (<4 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(32) [[ARG1:%.*]]) ; CHECK-SAME: (<4 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64> [[TMP0:%.*]])
; CHECK-NEXT: bb: ; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP:%.*]] = load <4 x i64>, <4 x i64>* [[ARG1]], align 32 ; CHECK-NEXT: [[ARG1_PRIV:%.*]] = alloca <4 x i64>
; CHECK-NEXT: store <4 x i64> [[TMP0]], <4 x i64>* [[ARG1_PRIV]]
; CHECK-NEXT: [[TMP:%.*]] = load <4 x i64>, <4 x i64>* [[ARG1_PRIV]], align 32
; CHECK-NEXT: store <4 x i64> [[TMP]], <4 x i64>* [[ARG]], align 32 ; CHECK-NEXT: store <4 x i64> [[TMP]], <4 x i64>* [[ARG]], align 32
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void
; ;
@ -65,7 +67,8 @@ define void @promote(<4 x i64>* %arg) #0 {
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8* ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8*
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false)
; CHECK-NEXT: call fastcc void @promote_avx2(<4 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(32) [[TMP]]) ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i64>, <4 x i64>* [[TMP]], align 1
; CHECK-NEXT: call fastcc void @promote_avx2(<4 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64> [[TMP0]])
; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32
; CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 ; CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void

View File

@ -8,9 +8,11 @@ target triple = "x86_64-unknown-linux-gnu"
; This should promote ; This should promote
define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 { define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 {
; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512
; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[ARG1:%.*]]) ; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]])
; CHECK-NEXT: bb: ; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 32 ; CHECK-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>
; CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]]
; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32
; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32 ; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void
; ;
@ -28,7 +30,8 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>*
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[TMP]]) ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1
; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void
@ -47,9 +50,11 @@ bb:
; This should promote ; This should promote
define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 { define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 {
; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256
; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[ARG1:%.*]]) ; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]])
; CHECK-NEXT: bb: ; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 32 ; CHECK-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>
; CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]]
; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32
; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32 ; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void
; ;
@ -67,7 +72,8 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>*
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[TMP]]) ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1
; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void
@ -86,9 +92,11 @@ bb:
; This should promote ; This should promote
define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 { define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 {
; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256
; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[ARG1:%.*]]) ; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]])
; CHECK-NEXT: bb: ; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 32 ; CHECK-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>
; CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]]
; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32
; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32 ; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void
; ;
@ -106,7 +114,8 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>*
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[TMP]]) ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1
; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void
@ -125,9 +134,11 @@ bb:
; This should promote ; This should promote
define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 { define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 {
; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512
; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[ARG1:%.*]]) ; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]])
; CHECK-NEXT: bb: ; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 32 ; CHECK-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>
; CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]]
; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32
; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32 ; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void
; ;
@ -145,7 +156,8 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>*
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[TMP]]) ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1
; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void
@ -242,9 +254,11 @@ bb:
; This should promote ; This should promote
define internal fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #3 { define internal fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #3 {
; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256 ; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256
; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[ARG1:%.*]]) ; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]])
; CHECK-NEXT: bb: ; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 32 ; CHECK-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>
; CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]]
; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32
; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32 ; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void
; ;
@ -262,7 +276,8 @@ define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %ar
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
; CHECK-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[TMP]]) ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1
; CHECK-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void
@ -281,9 +296,11 @@ bb:
; This should promote ; This should promote
define internal fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #4 { define internal fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #4 {
; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256 ; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256
; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[ARG1:%.*]]) ; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]])
; CHECK-NEXT: bb: ; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 32 ; CHECK-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>
; CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]]
; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32
; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32 ; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void
; ;
@ -301,7 +318,8 @@ define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %ar
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
; CHECK-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[TMP]]) ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1
; CHECK-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void

View File

@ -0,0 +1,32 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
; RUN: opt -S -passes='attributor' -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=2 < %s | FileCheck %s
define void @f() {
; CHECK-LABEL: define {{[^@]+}}@f()
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 1
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 1
; CHECK-NEXT: call void @g(i32 [[TMP0]])
; CHECK-NEXT: ret void
;
entry:
%a = alloca i32, align 1
call void @g(i32* %a)
ret void
}
define internal void @g(i32* %a) {
; CHECK-LABEL: define {{[^@]+}}@g
; CHECK-SAME: (i32 [[TMP0:%.*]])
; CHECK-NEXT: [[A_PRIV:%.*]] = alloca i32
; CHECK-NEXT: store i32 [[TMP0]], i32* [[A_PRIV]]
; CHECK-NEXT: [[AA:%.*]] = load i32, i32* [[A_PRIV]], align 1
; CHECK-NEXT: call void @z(i32 [[AA]])
; CHECK-NEXT: ret void
;
%aa = load i32, i32* %a, align 1
call void @z(i32 %aa)
ret void
}
declare void @z(i32)

View File

@ -6,13 +6,20 @@
; Don't drop 'byval' on %X here. ; Don't drop 'byval' on %X here.
define internal void @f(%struct.ss* byval %b, i32* byval %X, i32 %i) nounwind { define internal void @f(%struct.ss* byval %b, i32* byval %X, i32 %i) nounwind {
; CHECK-LABEL: define {{[^@]+}}@f ; CHECK-LABEL: define {{[^@]+}}@f
; CHECK-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 8 dereferenceable(12) [[B:%.*]], i32* noalias nocapture nofree nonnull writeonly byval dereferenceable(4) [[X:%.*]], i32 [[I:%.*]]) ; CHECK-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[I:%.*]])
; CHECK-NEXT: entry: ; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0 ; CHECK-NEXT: [[X_PRIV:%.*]] = alloca i32
; CHECK-NEXT: store i32 [[TMP2]], i32* [[X_PRIV]]
; CHECK-NEXT: [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]]
; CHECK-NEXT: [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32*
; CHECK-NEXT: store i32 [[TMP0]], i32* [[B_PRIV_CAST]]
; CHECK-NEXT: [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1
; CHECK-NEXT: store i64 [[TMP1]], i64* [[B_PRIV_0_1]]
; CHECK-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1
; CHECK-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 8 ; CHECK-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 8
; CHECK-NEXT: store i32 0, i32* [[X]] ; CHECK-NEXT: store i32 0, i32* [[X_PRIV]]
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void
; ;
entry: entry:
@ -36,7 +43,12 @@ define i32 @test(i32* %X) {
; CHECK-NEXT: store i32 1, i32* [[TMP1]], align 8 ; CHECK-NEXT: store i32 1, i32* [[TMP1]], align 8
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
; CHECK-NEXT: store i64 2, i64* [[TMP4]], align 4 ; CHECK-NEXT: store i64 2, i64* [[TMP4]], align 4
; CHECK-NEXT: call void @f(%struct.ss* noalias nocapture nofree nonnull readonly byval align 8 dereferenceable(12) [[S]], i32* nocapture nofree readonly byval [[X]], i32 zeroext 0) ; CHECK-NEXT: [[S_CAST:%.*]] = bitcast %struct.ss* [[S]] to i32*
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[S_CAST]], align 1
; CHECK-NEXT: [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[S_0_1]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[X]], align 1
; CHECK-NEXT: call void @f(i32 [[TMP0]], i64 [[TMP1]], i32 [[TMP2]], i32 zeroext 0)
; CHECK-NEXT: ret i32 0 ; CHECK-NEXT: ret i32 0
; ;
entry: entry:

View File

@ -4,9 +4,13 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:1
define internal i32 @test(i32* %X, i32* %Y) { define internal i32 @test(i32* %X, i32* %Y) {
; CHECK-LABEL: define {{[^@]+}}@test ; CHECK-LABEL: define {{[^@]+}}@test
; CHECK-SAME: (i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[X:%.*]], i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[Y:%.*]]) ; CHECK-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]])
; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[X]], align 4 ; CHECK-NEXT: [[Y_PRIV:%.*]] = alloca i32
; CHECK-NEXT: [[B:%.*]] = load i32, i32* [[Y]], align 4 ; CHECK-NEXT: store i32 [[TMP1]], i32* [[Y_PRIV]]
; CHECK-NEXT: [[X_PRIV:%.*]] = alloca i32
; CHECK-NEXT: store i32 [[TMP0]], i32* [[X_PRIV]]
; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[X_PRIV]], align 4
; CHECK-NEXT: [[B:%.*]] = load i32, i32* [[Y_PRIV]], align 4
; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[B]]
; CHECK-NEXT: ret i32 [[C]] ; CHECK-NEXT: ret i32 [[C]]
; ;
@ -18,10 +22,14 @@ define internal i32 @test(i32* %X, i32* %Y) {
define internal i32 @caller(i32* %B) { define internal i32 @caller(i32* %B) {
; CHECK-LABEL: define {{[^@]+}}@caller ; CHECK-LABEL: define {{[^@]+}}@caller
; CHECK-SAME: (i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) ; CHECK-SAME: (i32 [[TMP0:%.*]])
; CHECK-NEXT: [[B_PRIV:%.*]] = alloca i32
; CHECK-NEXT: store i32 [[TMP0]], i32* [[B_PRIV]]
; CHECK-NEXT: [[A:%.*]] = alloca i32 ; CHECK-NEXT: [[A:%.*]] = alloca i32
; CHECK-NEXT: store i32 1, i32* [[A]], align 4 ; CHECK-NEXT: store i32 1, i32* [[A]], align 4
; CHECK-NEXT: [[C:%.*]] = call i32 @test(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]]) ; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 1
; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_PRIV]], align 1
; CHECK-NEXT: [[C:%.*]] = call i32 @test(i32 [[TMP2]], i32 [[TMP3]])
; CHECK-NEXT: ret i32 [[C]] ; CHECK-NEXT: ret i32 [[C]]
; ;
%A = alloca i32 %A = alloca i32
@ -34,7 +42,8 @@ define i32 @callercaller() {
; CHECK-LABEL: define {{[^@]+}}@callercaller() ; CHECK-LABEL: define {{[^@]+}}@callercaller()
; CHECK-NEXT: [[B:%.*]] = alloca i32 ; CHECK-NEXT: [[B:%.*]] = alloca i32
; CHECK-NEXT: store i32 2, i32* [[B]], align 4 ; CHECK-NEXT: store i32 2, i32* [[B]], align 4
; CHECK-NEXT: [[X:%.*]] = call i32 @caller(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]]) ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 1
; CHECK-NEXT: [[X:%.*]] = call i32 @caller(i32 [[TMP1]])
; CHECK-NEXT: ret i32 [[X]] ; CHECK-NEXT: ret i32 [[X]]
; ;
%B = alloca i32 %B = alloca i32

View File

@ -5,13 +5,20 @@
define internal void @f(%struct.ss* byval %b, i32* byval %X) nounwind { define internal void @f(%struct.ss* byval %b, i32* byval %X) nounwind {
; CHECK-LABEL: define {{[^@]+}}@f ; CHECK-LABEL: define {{[^@]+}}@f
; CHECK-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 8 dereferenceable(12) [[B:%.*]], i32* noalias nocapture nofree nonnull writeonly byval dereferenceable(4) [[X:%.*]]) ; CHECK-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]], i32 [[TMP2:%.*]])
; CHECK-NEXT: entry: ; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0 ; CHECK-NEXT: [[X_PRIV:%.*]] = alloca i32
; CHECK-NEXT: store i32 [[TMP2]], i32* [[X_PRIV]]
; CHECK-NEXT: [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]]
; CHECK-NEXT: [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32*
; CHECK-NEXT: store i32 [[TMP0]], i32* [[B_PRIV_CAST]]
; CHECK-NEXT: [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1
; CHECK-NEXT: store i64 [[TMP1]], i64* [[B_PRIV_0_1]]
; CHECK-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1
; CHECK-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 8 ; CHECK-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 8
; CHECK-NEXT: store i32 0, i32* [[X]] ; CHECK-NEXT: store i32 0, i32* [[X_PRIV]]
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void
; ;
entry: entry:
@ -33,7 +40,12 @@ define i32 @test(i32* %X) {
; CHECK-NEXT: store i32 1, i32* [[TMP1]], align 8 ; CHECK-NEXT: store i32 1, i32* [[TMP1]], align 8
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
; CHECK-NEXT: store i64 2, i64* [[TMP4]], align 4 ; CHECK-NEXT: store i64 2, i64* [[TMP4]], align 4
; CHECK-NEXT: call void @f(%struct.ss* noalias nocapture nofree nonnull readonly byval align 8 dereferenceable(12) [[S]], i32* nocapture nofree readonly byval [[X]]) ; CHECK-NEXT: [[S_CAST:%.*]] = bitcast %struct.ss* [[S]] to i32*
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[S_CAST]], align 1
; CHECK-NEXT: [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[S_0_1]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[X]], align 1
; CHECK-NEXT: call void @f(i32 [[TMP0]], i64 [[TMP1]], i32 [[TMP2]])
; CHECK-NEXT: ret i32 0 ; CHECK-NEXT: ret i32 0
; ;
entry: entry:

View File

@ -7,9 +7,14 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:1
define internal void @f(%struct.ss* byval %b) nounwind { define internal void @f(%struct.ss* byval %b) nounwind {
; CHECK-LABEL: define {{[^@]+}}@f ; CHECK-LABEL: define {{[^@]+}}@f
; CHECK-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 8 dereferenceable(12) [[B:%.*]]) ; CHECK-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]])
; CHECK-NEXT: entry: ; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0 ; CHECK-NEXT: [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]]
; CHECK-NEXT: [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32*
; CHECK-NEXT: store i32 [[TMP0]], i32* [[B_PRIV_CAST]]
; CHECK-NEXT: [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1
; CHECK-NEXT: store i64 [[TMP1]], i64* [[B_PRIV_0_1]]
; CHECK-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1
; CHECK-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 8 ; CHECK-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 8
@ -26,9 +31,14 @@ entry:
define internal void @g(%struct.ss* byval align 32 %b) nounwind { define internal void @g(%struct.ss* byval align 32 %b) nounwind {
; CHECK-LABEL: define {{[^@]+}}@g ; CHECK-LABEL: define {{[^@]+}}@g
; CHECK-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 32 dereferenceable(12) [[B:%.*]]) ; CHECK-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]])
; CHECK-NEXT: entry: ; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0 ; CHECK-NEXT: [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]]
; CHECK-NEXT: [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32*
; CHECK-NEXT: store i32 [[TMP0]], i32* [[B_PRIV_CAST]]
; CHECK-NEXT: [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1
; CHECK-NEXT: store i64 [[TMP1]], i64* [[B_PRIV_0_1]]
; CHECK-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 32 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 32
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1
; CHECK-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 32 ; CHECK-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 32
@ -51,8 +61,16 @@ define i32 @main() nounwind {
; CHECK-NEXT: store i32 1, i32* [[TMP1]], align 8 ; CHECK-NEXT: store i32 1, i32* [[TMP1]], align 8
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
; CHECK-NEXT: store i64 2, i64* [[TMP4]], align 4 ; CHECK-NEXT: store i64 2, i64* [[TMP4]], align 4
; CHECK-NEXT: call void @f(%struct.ss* noalias nocapture nofree nonnull readonly byval align 8 dereferenceable(12) [[S]]) ; CHECK-NEXT: [[S_CAST:%.*]] = bitcast %struct.ss* [[S]] to i32*
; CHECK-NEXT: call void @g(%struct.ss* noalias nocapture nofree nonnull readonly byval align 32 dereferenceable(12) [[S]]) ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[S_CAST]], align 1
; CHECK-NEXT: [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[S_0_1]], align 1
; CHECK-NEXT: call void @f(i32 [[TMP0]], i64 [[TMP1]])
; CHECK-NEXT: [[S_CAST1:%.*]] = bitcast %struct.ss* [[S]] to i32*
; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[S_CAST1]], align 1
; CHECK-NEXT: [[S_0_12:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
; CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[S_0_12]], align 1
; CHECK-NEXT: call void @g(i32 [[TMP2]], i64 [[TMP3]])
; CHECK-NEXT: ret i32 0 ; CHECK-NEXT: ret i32 0
; ;
entry: entry:

View File

@ -5,12 +5,14 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:1
define internal i32 @callee(i1 %C, i32* %P) { define internal i32 @callee(i1 %C, i32* %P) {
; CHECK-LABEL: define {{[^@]+}}@callee ; CHECK-LABEL: define {{[^@]+}}@callee
; CHECK-SAME: (i1 [[C:%.*]], i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) ; CHECK-SAME: (i1 [[C:%.*]], i32 [[TMP0:%.*]])
; CHECK-NEXT: [[P_PRIV:%.*]] = alloca i32
; CHECK-NEXT: store i32 [[TMP0]], i32* [[P_PRIV]]
; CHECK-NEXT: br label [[F:%.*]] ; CHECK-NEXT: br label [[F:%.*]]
; CHECK: T: ; CHECK: T:
; CHECK-NEXT: unreachable ; CHECK-NEXT: unreachable
; CHECK: F: ; CHECK: F:
; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P]], align 4 ; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P_PRIV]], align 4
; CHECK-NEXT: ret i32 [[X]] ; CHECK-NEXT: ret i32 [[X]]
; ;
br i1 %C, label %T, label %F br i1 %C, label %T, label %F
@ -27,7 +29,8 @@ define i32 @foo() {
; CHECK-LABEL: define {{[^@]+}}@foo() ; CHECK-LABEL: define {{[^@]+}}@foo()
; CHECK-NEXT: [[A:%.*]] = alloca i32 ; CHECK-NEXT: [[A:%.*]] = alloca i32
; CHECK-NEXT: store i32 17, i32* [[A]], align 4 ; CHECK-NEXT: store i32 17, i32* [[A]], align 4
; CHECK-NEXT: [[X:%.*]] = call i32 @callee(i1 false, i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A]]) ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 1
; CHECK-NEXT: [[X:%.*]] = call i32 @callee(i1 false, i32 [[TMP1]])
; CHECK-NEXT: ret i32 [[X]] ; CHECK-NEXT: ret i32 [[X]]
; ;
%A = alloca i32 ; <i32*> [#uses=2] %A = alloca i32 ; <i32*> [#uses=2]

View File

@ -15,7 +15,11 @@ target triple = "x86_64-unknown-linux-gnu"
define void @run() { define void @run() {
; CHECK-LABEL: define {{[^@]+}}@run() ; CHECK-LABEL: define {{[^@]+}}@run()
; CHECK-NEXT: entry: ; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @CaptureAStruct(%struct.Foo* nofree nonnull readonly align 8 dereferenceable(16) @a) ; CHECK-NEXT: [[A_CAST:%.*]] = bitcast %struct.Foo* @a to i32*
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_CAST]], align 1
; CHECK-NEXT: [[A_0_1:%.*]] = getelementptr [[STRUCT_FOO:%.*]], %struct.Foo* @a, i32 0, i32 1
; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_0_1]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @CaptureAStruct(i32 [[TMP0]], i64 [[TMP1]])
; CHECK-NEXT: unreachable ; CHECK-NEXT: unreachable
; ;
entry: entry:
@ -48,15 +52,20 @@ define internal i64 @AccessPaddingOfStruct(%struct.Foo* byval %a) {
define internal i64 @CaptureAStruct(%struct.Foo* byval %a) { define internal i64 @CaptureAStruct(%struct.Foo* byval %a) {
; CHECK-LABEL: define {{[^@]+}}@CaptureAStruct ; CHECK-LABEL: define {{[^@]+}}@CaptureAStruct
; CHECK-SAME: (%struct.Foo* noalias nofree nonnull byval align 8 dereferenceable(16) [[A:%.*]]) ; CHECK-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]])
; CHECK-NEXT: entry: ; CHECK-NEXT: entry:
; CHECK-NEXT: [[A_PRIV:%.*]] = alloca [[STRUCT_FOO:%.*]]
; CHECK-NEXT: [[A_PRIV_CAST:%.*]] = bitcast %struct.Foo* [[A_PRIV]] to i32*
; CHECK-NEXT: store i32 [[TMP0]], i32* [[A_PRIV_CAST]]
; CHECK-NEXT: [[A_PRIV_0_1:%.*]] = getelementptr [[STRUCT_FOO]], %struct.Foo* [[A_PRIV]], i32 0, i32 1
; CHECK-NEXT: store i64 [[TMP1]], i64* [[A_PRIV_0_1]]
; CHECK-NEXT: [[A_PTR:%.*]] = alloca %struct.Foo* ; CHECK-NEXT: [[A_PTR:%.*]] = alloca %struct.Foo*
; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop: ; CHECK: loop:
; CHECK-NEXT: [[PHI:%.*]] = phi %struct.Foo* [ null, [[ENTRY:%.*]] ], [ [[GEP:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[PHI:%.*]] = phi %struct.Foo* [ null, [[ENTRY:%.*]] ], [ [[GEP:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[TMP0:%.*]] = phi %struct.Foo* [ [[A]], [[ENTRY]] ], [ [[TMP0]], [[LOOP]] ] ; CHECK-NEXT: [[TMP2:%.*]] = phi %struct.Foo* [ [[A_PRIV]], [[ENTRY]] ], [ [[TMP2]], [[LOOP]] ]
; CHECK-NEXT: store %struct.Foo* [[PHI]], %struct.Foo** [[A_PTR]], align 8 ; CHECK-NEXT: store %struct.Foo* [[PHI]], %struct.Foo** [[A_PTR]], align 8
; CHECK-NEXT: [[GEP]] = getelementptr [[STRUCT_FOO:%.*]], %struct.Foo* [[A]], i64 0 ; CHECK-NEXT: [[GEP]] = getelementptr [[STRUCT_FOO]], %struct.Foo* [[A_PRIV]], i64 0
; CHECK-NEXT: br label [[LOOP]] ; CHECK-NEXT: br label [[LOOP]]
; ;
entry: entry:

View File

@ -19,10 +19,15 @@ define internal i32 @f(%struct.ss* inalloca %s) {
; ATTRIBUTOR-NEXT: ret i32 [[R]] ; ATTRIBUTOR-NEXT: ret i32 [[R]]
; ;
; GLOBALOPT_ATTRIBUTOR-LABEL: define {{[^@]+}}@f ; GLOBALOPT_ATTRIBUTOR-LABEL: define {{[^@]+}}@f
; GLOBALOPT_ATTRIBUTOR-SAME: (%struct.ss* noalias nocapture nofree nonnull readonly align 4 dereferenceable(8) [[S:%.*]]) unnamed_addr ; GLOBALOPT_ATTRIBUTOR-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) unnamed_addr
; GLOBALOPT_ATTRIBUTOR-NEXT: entry: ; GLOBALOPT_ATTRIBUTOR-NEXT: entry:
; GLOBALOPT_ATTRIBUTOR-NEXT: [[F0:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[S]], i32 0, i32 0 ; GLOBALOPT_ATTRIBUTOR-NEXT: [[S_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]]
; GLOBALOPT_ATTRIBUTOR-NEXT: [[F1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 ; GLOBALOPT_ATTRIBUTOR-NEXT: [[S_PRIV_CAST:%.*]] = bitcast %struct.ss* [[S_PRIV]] to i32*
; GLOBALOPT_ATTRIBUTOR-NEXT: store i32 [[TMP0]], i32* [[S_PRIV_CAST]]
; GLOBALOPT_ATTRIBUTOR-NEXT: [[S_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S_PRIV]], i32 0, i32 1
; GLOBALOPT_ATTRIBUTOR-NEXT: store i32 [[TMP1]], i32* [[S_PRIV_0_1]]
; GLOBALOPT_ATTRIBUTOR-NEXT: [[F0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S_PRIV]], i32 0, i32 0
; GLOBALOPT_ATTRIBUTOR-NEXT: [[F1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S_PRIV]], i32 0, i32 1
; GLOBALOPT_ATTRIBUTOR-NEXT: [[A:%.*]] = load i32, i32* [[F0]], align 4 ; GLOBALOPT_ATTRIBUTOR-NEXT: [[A:%.*]] = load i32, i32* [[F0]], align 4
; GLOBALOPT_ATTRIBUTOR-NEXT: [[B:%.*]] = load i32, i32* [[F1]], align 4 ; GLOBALOPT_ATTRIBUTOR-NEXT: [[B:%.*]] = load i32, i32* [[F1]], align 4
; GLOBALOPT_ATTRIBUTOR-NEXT: [[R:%.*]] = add i32 [[A]], [[B]] ; GLOBALOPT_ATTRIBUTOR-NEXT: [[R:%.*]] = add i32 [[A]], [[B]]
@ -55,7 +60,11 @@ define i32 @main() {
; GLOBALOPT_ATTRIBUTOR-NEXT: [[F1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 ; GLOBALOPT_ATTRIBUTOR-NEXT: [[F1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
; GLOBALOPT_ATTRIBUTOR-NEXT: store i32 1, i32* [[F0]], align 4 ; GLOBALOPT_ATTRIBUTOR-NEXT: store i32 1, i32* [[F0]], align 4
; GLOBALOPT_ATTRIBUTOR-NEXT: store i32 2, i32* [[F1]], align 4 ; GLOBALOPT_ATTRIBUTOR-NEXT: store i32 2, i32* [[F1]], align 4
; GLOBALOPT_ATTRIBUTOR-NEXT: [[R:%.*]] = call fastcc i32 @f(%struct.ss* noalias nocapture nofree nonnull readonly align 4 dereferenceable(8) [[S]]) ; GLOBALOPT_ATTRIBUTOR-NEXT: [[S_CAST:%.*]] = bitcast %struct.ss* [[S]] to i32*
; GLOBALOPT_ATTRIBUTOR-NEXT: [[TMP0:%.*]] = load i32, i32* [[S_CAST]], align 1
; GLOBALOPT_ATTRIBUTOR-NEXT: [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
; GLOBALOPT_ATTRIBUTOR-NEXT: [[TMP1:%.*]] = load i32, i32* [[S_0_1]], align 1
; GLOBALOPT_ATTRIBUTOR-NEXT: [[R:%.*]] = call fastcc i32 @f(i32 [[TMP0]], i32 [[TMP1]])
; GLOBALOPT_ATTRIBUTOR-NEXT: ret i32 [[R]] ; GLOBALOPT_ATTRIBUTOR-NEXT: ret i32 [[R]]
; ;
entry: entry:

View File

@ -8,7 +8,8 @@ define void @caller() #0 {
; CHECK-LABEL: define {{[^@]+}}@caller() ; CHECK-LABEL: define {{[^@]+}}@caller()
; CHECK-NEXT: [[X:%.*]] = alloca i32 ; CHECK-NEXT: [[X:%.*]] = alloca i32
; CHECK-NEXT: store i32 42, i32* [[X]], align 4 ; CHECK-NEXT: store i32 42, i32* [[X]], align 4
; CHECK-NEXT: call void @promote_i32_ptr(i32* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[X]]), !prof !0 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[X]], align 1
; CHECK-NEXT: call void @promote_i32_ptr(i32 [[TMP1]]), !prof !0
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void
; ;
%x = alloca i32 %x = alloca i32
@ -19,8 +20,10 @@ define void @caller() #0 {
define internal void @promote_i32_ptr(i32* %xp) { define internal void @promote_i32_ptr(i32* %xp) {
; CHECK-LABEL: define {{[^@]+}}@promote_i32_ptr ; CHECK-LABEL: define {{[^@]+}}@promote_i32_ptr
; CHECK-SAME: (i32* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[XP:%.*]]) ; CHECK-SAME: (i32 [[TMP0:%.*]])
; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[XP]], align 4 ; CHECK-NEXT: [[XP_PRIV:%.*]] = alloca i32
; CHECK-NEXT: store i32 [[TMP0]], i32* [[XP_PRIV]]
; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[XP_PRIV]], align 4
; CHECK-NEXT: call void @use_i32(i32 [[X]]) ; CHECK-NEXT: call void @use_i32(i32 [[X]])
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void
; ;

View File

@ -10,8 +10,13 @@ declare i8* @foo(%pair*)
define internal void @bar(%pair* byval %Data) { define internal void @bar(%pair* byval %Data) {
; CHECK-LABEL: define {{[^@]+}}@bar ; CHECK-LABEL: define {{[^@]+}}@bar
; CHECK-SAME: (%pair* noalias byval [[DATA:%.*]]) ; CHECK-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]])
; CHECK-NEXT: [[TMP1:%.*]] = tail call i8* @foo(%pair* [[DATA]]) ; CHECK-NEXT: [[DATA_PRIV:%.*]] = alloca [[PAIR:%.*]]
; CHECK-NEXT: [[DATA_PRIV_CAST:%.*]] = bitcast %pair* [[DATA_PRIV]] to i32*
; CHECK-NEXT: store i32 [[TMP0]], i32* [[DATA_PRIV_CAST]]
; CHECK-NEXT: [[DATA_PRIV_0_1:%.*]] = getelementptr [[PAIR]], %pair* [[DATA_PRIV]], i32 0, i32 1
; CHECK-NEXT: store i32 [[TMP1]], i32* [[DATA_PRIV_0_1]]
; CHECK-NEXT: [[TMP3:%.*]] = call i8* @foo(%pair* [[DATA_PRIV]])
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void
; ;
tail call i8* @foo(%pair* %Data) tail call i8* @foo(%pair* %Data)
@ -21,7 +26,11 @@ define internal void @bar(%pair* byval %Data) {
define void @zed(%pair* byval %Data) { define void @zed(%pair* byval %Data) {
; CHECK-LABEL: define {{[^@]+}}@zed ; CHECK-LABEL: define {{[^@]+}}@zed
; CHECK-SAME: (%pair* noalias nocapture readonly byval [[DATA:%.*]]) ; CHECK-SAME: (%pair* noalias nocapture readonly byval [[DATA:%.*]])
; CHECK-NEXT: call void @bar(%pair* noalias nocapture readonly byval [[DATA]]) ; CHECK-NEXT: [[DATA_CAST:%.*]] = bitcast %pair* [[DATA]] to i32*
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DATA_CAST]], align 1
; CHECK-NEXT: [[DATA_0_1:%.*]] = getelementptr [[PAIR:%.*]], %pair* [[DATA]], i32 0, i32 1
; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[DATA_0_1]], align 1
; CHECK-NEXT: call void @bar(i32 [[TMP1]], i32 [[TMP2]])
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void
; ;
call void @bar(%pair* byval %Data) call void @bar(%pair* byval %Data)

View File

@ -6,12 +6,17 @@
@mystr = internal global %struct.MYstr zeroinitializer ; <%struct.MYstr*> [#uses=3] @mystr = internal global %struct.MYstr zeroinitializer ; <%struct.MYstr*> [#uses=3]
define internal void @vfu1(%struct.MYstr* byval align 4 %u) nounwind { define internal void @vfu1(%struct.MYstr* byval align 4 %u) nounwind {
; CHECK-LABEL: define {{[^@]+}}@vfu1 ; CHECK-LABEL: define {{[^@]+}}@vfu1
; CHECK-SAME: (%struct.MYstr* noalias nocapture nofree nonnull writeonly byval align 8 dereferenceable(8) [[U:%.*]]) ; CHECK-SAME: (i8 [[TMP0:%.*]], i32 [[TMP1:%.*]])
; CHECK-NEXT: entry: ; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_MYSTR:%.*]], %struct.MYstr* [[U]], i32 0, i32 1 ; CHECK-NEXT: [[U_PRIV:%.*]] = alloca [[STRUCT_MYSTR:%.*]]
; CHECK-NEXT: store i32 99, i32* [[TMP0]], align 4 ; CHECK-NEXT: [[U_PRIV_CAST:%.*]] = bitcast %struct.MYstr* [[U_PRIV]] to i8*
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U]], i32 0, i32 0 ; CHECK-NEXT: store i8 [[TMP0]], i8* [[U_PRIV_CAST]]
; CHECK-NEXT: store i8 97, i8* [[TMP1]], align 8 ; CHECK-NEXT: [[U_PRIV_0_1:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 1
; CHECK-NEXT: store i32 [[TMP1]], i32* [[U_PRIV_0_1]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 1
; CHECK-NEXT: store i32 99, i32* [[TMP2]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 0
; CHECK-NEXT: store i8 97, i8* [[TMP3]], align 8
; CHECK-NEXT: br label [[RETURN:%.*]] ; CHECK-NEXT: br label [[RETURN:%.*]]
; CHECK: return: ; CHECK: return:
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void
@ -29,15 +34,20 @@ return: ; preds = %entry
define internal i32 @vfu2(%struct.MYstr* byval align 4 %u) nounwind readonly { define internal i32 @vfu2(%struct.MYstr* byval align 4 %u) nounwind readonly {
; CHECK-LABEL: define {{[^@]+}}@vfu2 ; CHECK-LABEL: define {{[^@]+}}@vfu2
; CHECK-SAME: (%struct.MYstr* noalias nocapture nofree nonnull readonly byval align 8 dereferenceable(8) [[U:%.*]]) ; CHECK-SAME: (i8 [[TMP0:%.*]], i32 [[TMP1:%.*]])
; CHECK-NEXT: entry: ; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_MYSTR:%.*]], %struct.MYstr* @mystr, i32 0, i32 1 ; CHECK-NEXT: [[U_PRIV:%.*]] = alloca [[STRUCT_MYSTR:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]] ; CHECK-NEXT: [[U_PRIV_CAST:%.*]] = bitcast %struct.MYstr* [[U_PRIV]] to i8*
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* @mystr, i32 0, i32 0 ; CHECK-NEXT: store i8 [[TMP0]], i8* [[U_PRIV_CAST]]
; CHECK-NEXT: [[TMP3:%.*]] = load i8, i8* [[TMP2]], align 8 ; CHECK-NEXT: [[U_PRIV_0_1:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 1
; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i32 ; CHECK-NEXT: store i32 [[TMP1]], i32* [[U_PRIV_0_1]]
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], [[TMP1]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* @mystr, i32 0, i32 1
; CHECK-NEXT: ret i32 [[TMP5]] ; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* @mystr, i32 0, i32 0
; CHECK-NEXT: [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 8
; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], [[TMP3]]
; CHECK-NEXT: ret i32 [[TMP7]]
; ;
entry: entry:
%0 = getelementptr %struct.MYstr, %struct.MYstr* %u, i32 0, i32 1 ; <i32*> [#uses=1] %0 = getelementptr %struct.MYstr, %struct.MYstr* %u, i32 0, i32 1 ; <i32*> [#uses=1]
@ -52,8 +62,16 @@ entry:
define i32 @unions() nounwind { define i32 @unions() nounwind {
; CHECK-LABEL: define {{[^@]+}}@unions() ; CHECK-LABEL: define {{[^@]+}}@unions()
; CHECK-NEXT: entry: ; CHECK-NEXT: entry:
; CHECK-NEXT: call void @vfu1(%struct.MYstr* nofree nonnull readonly byval align 8 dereferenceable(8) @mystr) ; CHECK-NEXT: [[MYSTR_CAST1:%.*]] = bitcast %struct.MYstr* @mystr to i8*
; CHECK-NEXT: [[RESULT:%.*]] = call i32 @vfu2(%struct.MYstr* nofree nonnull readonly byval align 8 dereferenceable(8) @mystr) ; CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[MYSTR_CAST1]], align 1
; CHECK-NEXT: [[MYSTR_0_12:%.*]] = getelementptr [[STRUCT_MYSTR:%.*]], %struct.MYstr* @mystr, i32 0, i32 1
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[MYSTR_0_12]], align 1
; CHECK-NEXT: call void @vfu1(i8 [[TMP0]], i32 [[TMP1]])
; CHECK-NEXT: [[MYSTR_CAST:%.*]] = bitcast %struct.MYstr* @mystr to i8*
; CHECK-NEXT: [[TMP2:%.*]] = load i8, i8* [[MYSTR_CAST]], align 1
; CHECK-NEXT: [[MYSTR_0_1:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* @mystr, i32 0, i32 1
; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[MYSTR_0_1]], align 1
; CHECK-NEXT: [[RESULT:%.*]] = call i32 @vfu2(i8 [[TMP2]], i32 [[TMP3]])
; CHECK-NEXT: ret i32 [[RESULT]] ; CHECK-NEXT: ret i32 [[RESULT]]
; ;
entry: entry:
@ -64,17 +82,22 @@ entry:
define internal i32 @vfu2_v2(%struct.MYstr* byval align 4 %u) nounwind readonly { define internal i32 @vfu2_v2(%struct.MYstr* byval align 4 %u) nounwind readonly {
; CHECK-LABEL: define {{[^@]+}}@vfu2_v2 ; CHECK-LABEL: define {{[^@]+}}@vfu2_v2
; CHECK-SAME: (%struct.MYstr* noalias nocapture nofree nonnull byval align 8 dereferenceable(8) [[U:%.*]]) ; CHECK-SAME: (i8 [[TMP0:%.*]], i32 [[TMP1:%.*]])
; CHECK-NEXT: entry: ; CHECK-NEXT: entry:
; CHECK-NEXT: [[Z:%.*]] = getelementptr [[STRUCT_MYSTR:%.*]], %struct.MYstr* [[U]], i32 0, i32 1 ; CHECK-NEXT: [[U_PRIV:%.*]] = alloca [[STRUCT_MYSTR:%.*]]
; CHECK-NEXT: [[U_PRIV_CAST:%.*]] = bitcast %struct.MYstr* [[U_PRIV]] to i8*
; CHECK-NEXT: store i8 [[TMP0]], i8* [[U_PRIV_CAST]]
; CHECK-NEXT: [[U_PRIV_0_1:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 1
; CHECK-NEXT: store i32 [[TMP1]], i32* [[U_PRIV_0_1]]
; CHECK-NEXT: [[Z:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 1
; CHECK-NEXT: store i32 99, i32* [[Z]], align 4 ; CHECK-NEXT: store i32 99, i32* [[Z]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U]], i32 0, i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 1
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U]], i32 0, i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 0
; CHECK-NEXT: [[TMP3:%.*]] = load i8, i8* [[TMP2]], align 8 ; CHECK-NEXT: [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 8
; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i32 ; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], [[TMP1]] ; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], [[TMP3]]
; CHECK-NEXT: ret i32 [[TMP5]] ; CHECK-NEXT: ret i32 [[TMP7]]
; ;
entry: entry:
%z = getelementptr %struct.MYstr, %struct.MYstr* %u, i32 0, i32 1 %z = getelementptr %struct.MYstr, %struct.MYstr* %u, i32 0, i32 1
@ -91,8 +114,16 @@ entry:
define i32 @unions_v2() nounwind { define i32 @unions_v2() nounwind {
; CHECK-LABEL: define {{[^@]+}}@unions_v2() ; CHECK-LABEL: define {{[^@]+}}@unions_v2()
; CHECK-NEXT: entry: ; CHECK-NEXT: entry:
; CHECK-NEXT: call void @vfu1(%struct.MYstr* nofree nonnull readonly byval align 8 dereferenceable(8) @mystr) ; CHECK-NEXT: [[MYSTR_CAST1:%.*]] = bitcast %struct.MYstr* @mystr to i8*
; CHECK-NEXT: [[RESULT:%.*]] = call i32 @vfu2_v2(%struct.MYstr* nofree nonnull readonly byval align 8 dereferenceable(8) @mystr) ; CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[MYSTR_CAST1]], align 1
; CHECK-NEXT: [[MYSTR_0_12:%.*]] = getelementptr [[STRUCT_MYSTR:%.*]], %struct.MYstr* @mystr, i32 0, i32 1
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[MYSTR_0_12]], align 1
; CHECK-NEXT: call void @vfu1(i8 [[TMP0]], i32 [[TMP1]])
; CHECK-NEXT: [[MYSTR_CAST:%.*]] = bitcast %struct.MYstr* @mystr to i8*
; CHECK-NEXT: [[TMP2:%.*]] = load i8, i8* [[MYSTR_CAST]], align 1
; CHECK-NEXT: [[MYSTR_0_1:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* @mystr, i32 0, i32 1
; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[MYSTR_0_1]], align 1
; CHECK-NEXT: [[RESULT:%.*]] = call i32 @vfu2_v2(i8 [[TMP2]], i32 [[TMP3]])
; CHECK-NEXT: ret i32 [[RESULT]] ; CHECK-NEXT: ret i32 [[RESULT]]
; ;
entry: entry:

View File

@ -9,7 +9,7 @@ define i64 @fn2() {
; CHECK-NEXT: entry: ; CHECK-NEXT: entry:
; CHECK-NEXT: [[CONV:%.*]] = sext i32 undef to i64 ; CHECK-NEXT: [[CONV:%.*]] = sext i32 undef to i64
; CHECK-NEXT: [[DIV:%.*]] = sdiv i64 8, [[CONV]] ; CHECK-NEXT: [[DIV:%.*]] = sdiv i64 8, [[CONV]]
; CHECK-NEXT: [[CALL2:%.*]] = call i64 @fn1(i64 [[DIV]]) ; CHECK-NEXT: [[CALL2:%.*]] = call i64 @fn1(i64 [[DIV]]) #0, !range !0
; CHECK-NEXT: ret i64 [[CALL2]] ; CHECK-NEXT: ret i64 [[CALL2]]
; ;
entry: entry:

View File

@ -63,5 +63,112 @@ declare void @t0_check(i32* align 256, i64, i32*)
declare !callback !0 void @t0_callback_broker(i32*, i32*, void (i32*, i32*, ...)*, ...) declare !callback !0 void @t0_callback_broker(i32*, i32*, void (i32*, i32*, ...)*, ...)
; Test 1
;
; Similar to test 0 but with some additional annotations (noalias/nocapute) to make sure
; we deduce and propagate noalias and others properly.
define void @t1_caller(i32* noalias %a) {
; CHECK-LABEL: define {{[^@]+}}@t1_caller
; CHECK-SAME: (i32* noalias nocapture align 256 [[A:%.*]])
; CHECK-NEXT: entry:
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 32
; CHECK-NEXT: [[C:%.*]] = alloca i32*, align 64
; CHECK-NEXT: [[PTR:%.*]] = alloca i32, align 128
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8*
; CHECK-NEXT: store i32 42, i32* [[B]], align 32
; CHECK-NEXT: store i32* [[B]], i32** [[C]], align 64
; CHECK-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias align 536870912 null, i32* noalias nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 99, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]])
; CHECK-NEXT: ret void
;
entry:
%b = alloca i32, align 32
%c = alloca i32*, align 64
%ptr = alloca i32, align 128
%0 = bitcast i32* %b to i8*
store i32 42, i32* %b, align 4
store i32* %b, i32** %c, align 8
call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* null, i32* %ptr, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* %a, i64 99, i32** %c)
ret void
}
; Note that the first two arguments are provided by the callback_broker according to the callback in !1 below!
; The others are annotated with alignment information, amongst others, or even replaced by the constants passed to the call.
define internal void @t1_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, i64 %b, i32** %c) {
; CHECK-LABEL: define {{[^@]+}}@t1_callback_callee
; CHECK-SAME: (i32* nocapture nonnull writeonly dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* noalias nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]])
; CHECK-NEXT: entry:
; CHECK-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
; CHECK-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]]
; CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C]], align 64
; CHECK-NEXT: tail call void @t1_check(i32* nocapture align 256 [[A]], i64 99, i32* [[TMP0]])
; CHECK-NEXT: ret void
;
entry:
%ptr_val = load i32, i32* %ptr, align 8
store i32 %ptr_val, i32* %is_not_null
%0 = load i32*, i32** %c, align 8
tail call void @t1_check(i32* %a, i64 %b, i32* %0)
ret void
}
declare void @t1_check(i32* nocapture align 256, i64, i32* nocapture) nosync
declare !callback !0 void @t1_callback_broker(i32* nocapture , i32* nocapture , void (i32*, i32*, ...)* nocapture, ...)
; Test 2
;
; Similar to test 1 but checking that the noalias is only placed if potential synchronization through @t2_check is preserved.
define void @t2_caller(i32* noalias %a) {
; CHECK-LABEL: define {{[^@]+}}@t2_caller
; CHECK-SAME: (i32* noalias nocapture align 256 [[A:%.*]])
; CHECK-NEXT: entry:
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 32
; CHECK-NEXT: [[C:%.*]] = alloca i32*, align 64
; CHECK-NEXT: [[PTR:%.*]] = alloca i32, align 128
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8*
; CHECK-NEXT: store i32 42, i32* [[B]], align 32
; CHECK-NEXT: store i32* [[B]], i32** [[C]], align 64
; CHECK-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias align 536870912 null, i32* noalias nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 99, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]])
; CHECK-NEXT: ret void
;
entry:
%b = alloca i32, align 32
%c = alloca i32*, align 64
%ptr = alloca i32, align 128
%0 = bitcast i32* %b to i8*
store i32 42, i32* %b, align 4
store i32* %b, i32** %c, align 8
call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* null, i32* %ptr, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* %a, i64 99, i32** %c)
ret void
}
; Note that the first two arguments are provided by the callback_broker according to the callback in !1 below!
; The others are annotated with alignment information, amongst others, or even replaced by the constants passed to the call.
;
; FIXME: We should derive noalias for %a and add a "fake use" of %a in all potentially synchronizing calls.
define internal void @t2_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, i64 %b, i32** %c) {
; CHECK-LABEL: define {{[^@]+}}@t2_callback_callee
; CHECK-SAME: (i32* nocapture nonnull writeonly dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]])
; CHECK-NEXT: entry:
; CHECK-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
; CHECK-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]]
; CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C]], align 64
; CHECK-NEXT: tail call void @t2_check(i32* nocapture align 256 [[A]], i64 99, i32* [[TMP0]])
; CHECK-NEXT: ret void
;
entry:
%ptr_val = load i32, i32* %ptr, align 8
store i32 %ptr_val, i32* %is_not_null
%0 = load i32*, i32** %c, align 8
tail call void @t2_check(i32* %a, i64 %b, i32* %0)
ret void
}
declare void @t2_check(i32* nocapture align 256, i64, i32* nocapture)
declare !callback !0 void @t2_callback_broker(i32* nocapture , i32* nocapture , void (i32*, i32*, ...)* nocapture, ...)
!0 = !{!1} !0 = !{!1}
!1 = !{i64 2, i64 -1, i64 -1, i1 true} !1 = !{i64 2, i64 -1, i64 -1, i1 true}

View File

@ -40,7 +40,7 @@ entry:
ret i32 %add ret i32 %add
} }
; CHECK: define internal i32 @noalias_args_argmem_ro(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) %A, i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) %B) ; CHECK: define internal i32 @noalias_args_argmem_ro(i32 %0, i32 %1)
define internal i32 @noalias_args_argmem_ro(i32* %A, i32* %B) #1 { define internal i32 @noalias_args_argmem_ro(i32* %A, i32* %B) #1 {
%t0 = load i32, i32* %A, align 4 %t0 = load i32, i32* %A, align 4
%t1 = load i32, i32* %B, align 4 %t1 = load i32, i32* %B, align 4