forked from OSchip/llvm-project
[Alignment] Remove unnecessary getValueOrABITypeAlignment calls (NFC)
Now that load/store alignment is required, we no longer need most of them. Also switch the getLoadStoreAlignment() helper to return Align instead of MaybeAlign.
This commit is contained in:
parent
fde8eb00e1
commit
52e98f620c
|
@ -5151,12 +5151,12 @@ inline Value *getPointerOperand(Value *V) {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A helper function that returns the alignment of load or store instruction.
|
/// A helper function that returns the alignment of load or store instruction.
|
||||||
inline MaybeAlign getLoadStoreAlignment(Value *I) {
|
inline Align getLoadStoreAlignment(Value *I) {
|
||||||
assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
|
assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
|
||||||
"Expected Load or Store instruction");
|
"Expected Load or Store instruction");
|
||||||
if (auto *LI = dyn_cast<LoadInst>(I))
|
if (auto *LI = dyn_cast<LoadInst>(I))
|
||||||
return MaybeAlign(LI->getAlignment());
|
return LI->getAlign();
|
||||||
return MaybeAlign(cast<StoreInst>(I)->getAlignment());
|
return cast<StoreInst>(I)->getAlign();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A helper function that returns the address space of the pointer operand of
|
/// A helper function that returns the address space of the pointer operand of
|
||||||
|
|
|
@ -210,8 +210,7 @@ bool llvm::isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L,
|
||||||
|
|
||||||
APInt EltSize(DL.getIndexTypeSizeInBits(Ptr->getType()),
|
APInt EltSize(DL.getIndexTypeSizeInBits(Ptr->getType()),
|
||||||
DL.getTypeStoreSize(LI->getType()));
|
DL.getTypeStoreSize(LI->getType()));
|
||||||
const Align Alignment = DL.getValueOrABITypeAlignment(
|
const Align Alignment = LI->getAlign();
|
||||||
MaybeAlign(LI->getAlignment()), LI->getType());
|
|
||||||
|
|
||||||
Instruction *HeaderFirstNonPHI = L->getHeader()->getFirstNonPHI();
|
Instruction *HeaderFirstNonPHI = L->getHeader()->getFirstNonPHI();
|
||||||
|
|
||||||
|
|
|
@ -946,13 +946,8 @@ void InterleavedAccessInfo::collectConstStrideAccesses(
|
||||||
const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
|
const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
|
||||||
PointerType *PtrTy = cast<PointerType>(Ptr->getType());
|
PointerType *PtrTy = cast<PointerType>(Ptr->getType());
|
||||||
uint64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
|
uint64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
|
||||||
|
AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size,
|
||||||
// An alignment of 0 means target ABI alignment.
|
getLoadStoreAlignment(&I));
|
||||||
MaybeAlign Alignment = MaybeAlign(getLoadStoreAlignment(&I));
|
|
||||||
if (!Alignment)
|
|
||||||
Alignment = Align(DL.getABITypeAlignment(PtrTy->getElementType()));
|
|
||||||
|
|
||||||
AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, *Alignment);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -247,7 +247,7 @@ Align IRTranslator::getMemOpAlign(const Instruction &I) {
|
||||||
if (const StoreInst *SI = dyn_cast<StoreInst>(&I))
|
if (const StoreInst *SI = dyn_cast<StoreInst>(&I))
|
||||||
return SI->getAlign();
|
return SI->getAlign();
|
||||||
if (const LoadInst *LI = dyn_cast<LoadInst>(&I)) {
|
if (const LoadInst *LI = dyn_cast<LoadInst>(&I)) {
|
||||||
return DL->getValueOrABITypeAlignment(LI->getAlign(), LI->getType());
|
return LI->getAlign();
|
||||||
}
|
}
|
||||||
if (const AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(&I)) {
|
if (const AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(&I)) {
|
||||||
// TODO(PR27168): This instruction has no alignment attribute, but unlike
|
// TODO(PR27168): This instruction has no alignment attribute, but unlike
|
||||||
|
|
|
@ -3956,7 +3956,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
|
||||||
SDValue Ptr = getValue(SV);
|
SDValue Ptr = getValue(SV);
|
||||||
|
|
||||||
Type *Ty = I.getType();
|
Type *Ty = I.getType();
|
||||||
Align Alignment = DL->getValueOrABITypeAlignment(I.getAlign(), Ty);
|
Align Alignment = I.getAlign();
|
||||||
|
|
||||||
AAMDNodes AAInfo;
|
AAMDNodes AAInfo;
|
||||||
I.getAAMetadata(AAInfo);
|
I.getAAMetadata(AAInfo);
|
||||||
|
@ -4149,8 +4149,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
|
||||||
SDValue Root = I.isVolatile() ? getRoot() : getMemoryRoot();
|
SDValue Root = I.isVolatile() ? getRoot() : getMemoryRoot();
|
||||||
SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
|
SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
|
||||||
SDLoc dl = getCurSDLoc();
|
SDLoc dl = getCurSDLoc();
|
||||||
Align Alignment =
|
Align Alignment = I.getAlign();
|
||||||
DL->getValueOrABITypeAlignment(I.getAlign(), SrcV->getType());
|
|
||||||
AAMDNodes AAInfo;
|
AAMDNodes AAInfo;
|
||||||
I.getAAMetadata(AAInfo);
|
I.getAAMetadata(AAInfo);
|
||||||
|
|
||||||
|
|
|
@ -3930,14 +3930,12 @@ bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
|
||||||
const X86InstrInfo &XII = (const X86InstrInfo &)TII;
|
const X86InstrInfo &XII = (const X86InstrInfo &)TII;
|
||||||
|
|
||||||
unsigned Size = DL.getTypeAllocSize(LI->getType());
|
unsigned Size = DL.getTypeAllocSize(LI->getType());
|
||||||
Align Alignment =
|
|
||||||
DL.getValueOrABITypeAlignment(LI->getAlign(), LI->getType());
|
|
||||||
|
|
||||||
SmallVector<MachineOperand, 8> AddrOps;
|
SmallVector<MachineOperand, 8> AddrOps;
|
||||||
AM.getFullAddress(AddrOps);
|
AM.getFullAddress(AddrOps);
|
||||||
|
|
||||||
MachineInstr *Result = XII.foldMemoryOperandImpl(
|
MachineInstr *Result = XII.foldMemoryOperandImpl(
|
||||||
*FuncInfo.MF, *MI, OpNo, AddrOps, FuncInfo.InsertPt, Size, Alignment,
|
*FuncInfo.MF, *MI, OpNo, AddrOps, FuncInfo.InsertPt, Size, LI->getAlign(),
|
||||||
/*AllowCommute=*/true);
|
/*AllowCommute=*/true);
|
||||||
if (!Result)
|
if (!Result)
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -1441,17 +1441,14 @@ void DFSanVisitor::visitStoreInst(StoreInst &SI) {
|
||||||
if (Size == 0)
|
if (Size == 0)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
const Align Alignement =
|
const Align Alignment = ClPreserveAlignment ? SI.getAlign() : Align(1);
|
||||||
ClPreserveAlignment ? DL.getValueOrABITypeAlignment(
|
|
||||||
SI.getAlign(), SI.getValueOperand()->getType())
|
|
||||||
: Align(1);
|
|
||||||
|
|
||||||
Value* Shadow = DFSF.getShadow(SI.getValueOperand());
|
Value* Shadow = DFSF.getShadow(SI.getValueOperand());
|
||||||
if (ClCombinePointerLabelsOnStore) {
|
if (ClCombinePointerLabelsOnStore) {
|
||||||
Value *PtrShadow = DFSF.getShadow(SI.getPointerOperand());
|
Value *PtrShadow = DFSF.getShadow(SI.getPointerOperand());
|
||||||
Shadow = DFSF.combineShadows(Shadow, PtrShadow, &SI);
|
Shadow = DFSF.combineShadows(Shadow, PtrShadow, &SI);
|
||||||
}
|
}
|
||||||
DFSF.storeShadow(SI.getPointerOperand(), Size, Alignement, Shadow, &SI);
|
DFSF.storeShadow(SI.getPointerOperand(), Size, Alignment, Shadow, &SI);
|
||||||
if (ClEventCallbacks) {
|
if (ClEventCallbacks) {
|
||||||
IRBuilder<> IRB(&SI);
|
IRBuilder<> IRB(&SI);
|
||||||
IRB.CreateCall(DFSF.DFS.DFSanStoreCallbackFn, Shadow);
|
IRB.CreateCall(DFSF.DFS.DFSanStoreCallbackFn, Shadow);
|
||||||
|
|
|
@ -320,24 +320,19 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall) {
|
||||||
WorkList.push_back(K);
|
WorkList.push_back(K);
|
||||||
}
|
}
|
||||||
|
|
||||||
const DataLayout &DL = SE->getDataLayout();
|
|
||||||
while (!WorkList.empty()) {
|
while (!WorkList.empty()) {
|
||||||
Instruction *J = WorkList.pop_back_val();
|
Instruction *J = WorkList.pop_back_val();
|
||||||
if (LoadInst *LI = dyn_cast<LoadInst>(J)) {
|
if (LoadInst *LI = dyn_cast<LoadInst>(J)) {
|
||||||
Align NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV,
|
Align NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV,
|
||||||
LI->getPointerOperand(), SE);
|
LI->getPointerOperand(), SE);
|
||||||
Align OldAlignment =
|
if (NewAlignment > LI->getAlign()) {
|
||||||
DL.getValueOrABITypeAlignment(LI->getAlign(), LI->getType());
|
|
||||||
if (NewAlignment > OldAlignment) {
|
|
||||||
LI->setAlignment(NewAlignment);
|
LI->setAlignment(NewAlignment);
|
||||||
++NumLoadAlignChanged;
|
++NumLoadAlignChanged;
|
||||||
}
|
}
|
||||||
} else if (StoreInst *SI = dyn_cast<StoreInst>(J)) {
|
} else if (StoreInst *SI = dyn_cast<StoreInst>(J)) {
|
||||||
Align NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV,
|
Align NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV,
|
||||||
SI->getPointerOperand(), SE);
|
SI->getPointerOperand(), SE);
|
||||||
Align OldAlignment = DL.getValueOrABITypeAlignment(
|
if (NewAlignment > SI->getAlign()) {
|
||||||
SI->getAlign(), SI->getOperand(0)->getType());
|
|
||||||
if (NewAlignment > OldAlignment) {
|
|
||||||
SI->setAlignment(NewAlignment);
|
SI->setAlignment(NewAlignment);
|
||||||
++NumStoreAlignChanged;
|
++NumStoreAlignChanged;
|
||||||
}
|
}
|
||||||
|
|
|
@ -143,23 +143,6 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
|
||||||
return TheStores.size() > NumPointerStores+NumByteStores;
|
return TheStores.size() > NumPointerStores+NumByteStores;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static Align findStoreAlignment(const DataLayout &DL, const StoreInst *SI) {
|
|
||||||
return DL.getValueOrABITypeAlignment(SI->getAlign(),
|
|
||||||
SI->getOperand(0)->getType());
|
|
||||||
}
|
|
||||||
|
|
||||||
static Align findLoadAlignment(const DataLayout &DL, const LoadInst *LI) {
|
|
||||||
return DL.getValueOrABITypeAlignment(LI->getAlign(), LI->getType());
|
|
||||||
}
|
|
||||||
|
|
||||||
static Align findCommonAlignment(const DataLayout &DL, const StoreInst *SI,
|
|
||||||
const LoadInst *LI) {
|
|
||||||
Align StoreAlign = findStoreAlignment(DL, SI);
|
|
||||||
Align LoadAlign = findLoadAlignment(DL, LI);
|
|
||||||
return commonAlignment(StoreAlign, LoadAlign);
|
|
||||||
}
|
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
class MemsetRanges {
|
class MemsetRanges {
|
||||||
|
@ -190,7 +173,7 @@ public:
|
||||||
int64_t StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
|
int64_t StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
|
||||||
|
|
||||||
addRange(OffsetFromFirst, StoreSize, SI->getPointerOperand(),
|
addRange(OffsetFromFirst, StoreSize, SI->getPointerOperand(),
|
||||||
findStoreAlignment(DL, SI).value(), SI);
|
SI->getAlign().value(), SI);
|
||||||
}
|
}
|
||||||
|
|
||||||
void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
|
void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
|
||||||
|
@ -579,12 +562,12 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
|
||||||
Instruction *M;
|
Instruction *M;
|
||||||
if (UseMemMove)
|
if (UseMemMove)
|
||||||
M = Builder.CreateMemMove(
|
M = Builder.CreateMemMove(
|
||||||
SI->getPointerOperand(), findStoreAlignment(DL, SI),
|
SI->getPointerOperand(), SI->getAlign(),
|
||||||
LI->getPointerOperand(), findLoadAlignment(DL, LI), Size);
|
LI->getPointerOperand(), LI->getAlign(), Size);
|
||||||
else
|
else
|
||||||
M = Builder.CreateMemCpy(
|
M = Builder.CreateMemCpy(
|
||||||
SI->getPointerOperand(), findStoreAlignment(DL, SI),
|
SI->getPointerOperand(), SI->getAlign(),
|
||||||
LI->getPointerOperand(), findLoadAlignment(DL, LI), Size);
|
LI->getPointerOperand(), LI->getAlign(), Size);
|
||||||
|
|
||||||
LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => "
|
LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => "
|
||||||
<< *M << "\n");
|
<< *M << "\n");
|
||||||
|
@ -636,7 +619,7 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
|
||||||
LI, SI->getPointerOperand()->stripPointerCasts(),
|
LI, SI->getPointerOperand()->stripPointerCasts(),
|
||||||
LI->getPointerOperand()->stripPointerCasts(),
|
LI->getPointerOperand()->stripPointerCasts(),
|
||||||
DL.getTypeStoreSize(SI->getOperand(0)->getType()),
|
DL.getTypeStoreSize(SI->getOperand(0)->getType()),
|
||||||
findCommonAlignment(DL, SI, LI), C);
|
commonAlignment(SI->getAlign(), LI->getAlign()), C);
|
||||||
if (changed) {
|
if (changed) {
|
||||||
MD->removeInstruction(SI);
|
MD->removeInstruction(SI);
|
||||||
SI->eraseFromParent();
|
SI->eraseFromParent();
|
||||||
|
@ -669,11 +652,9 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
|
||||||
auto *T = V->getType();
|
auto *T = V->getType();
|
||||||
if (T->isAggregateType()) {
|
if (T->isAggregateType()) {
|
||||||
uint64_t Size = DL.getTypeStoreSize(T);
|
uint64_t Size = DL.getTypeStoreSize(T);
|
||||||
const Align MA =
|
|
||||||
DL.getValueOrABITypeAlignment(MaybeAlign(SI->getAlignment()), T);
|
|
||||||
IRBuilder<> Builder(SI);
|
IRBuilder<> Builder(SI);
|
||||||
auto *M =
|
auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size,
|
||||||
Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size, MA);
|
SI->getAlign());
|
||||||
|
|
||||||
LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
|
LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
|
||||||
|
|
||||||
|
|
|
@ -1267,7 +1267,6 @@ static void speculatePHINodeLoads(PHINode &PN) {
|
||||||
|
|
||||||
LoadInst *SomeLoad = cast<LoadInst>(PN.user_back());
|
LoadInst *SomeLoad = cast<LoadInst>(PN.user_back());
|
||||||
Type *LoadTy = SomeLoad->getType();
|
Type *LoadTy = SomeLoad->getType();
|
||||||
const DataLayout &DL = PN.getModule()->getDataLayout();
|
|
||||||
IRBuilderTy PHIBuilder(&PN);
|
IRBuilderTy PHIBuilder(&PN);
|
||||||
PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(),
|
PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(),
|
||||||
PN.getName() + ".sroa.speculated");
|
PN.getName() + ".sroa.speculated");
|
||||||
|
@ -1276,8 +1275,7 @@ static void speculatePHINodeLoads(PHINode &PN) {
|
||||||
// matter which one we get and if any differ.
|
// matter which one we get and if any differ.
|
||||||
AAMDNodes AATags;
|
AAMDNodes AATags;
|
||||||
SomeLoad->getAAMetadata(AATags);
|
SomeLoad->getAAMetadata(AATags);
|
||||||
Align Alignment =
|
Align Alignment = SomeLoad->getAlign();
|
||||||
DL.getValueOrABITypeAlignment(SomeLoad->getAlign(), SomeLoad->getType());
|
|
||||||
|
|
||||||
// Rewrite all loads of the PN to use the new PHI.
|
// Rewrite all loads of the PN to use the new PHI.
|
||||||
while (!PN.use_empty()) {
|
while (!PN.use_empty()) {
|
||||||
|
@ -1304,11 +1302,10 @@ static void speculatePHINodeLoads(PHINode &PN) {
|
||||||
Instruction *TI = Pred->getTerminator();
|
Instruction *TI = Pred->getTerminator();
|
||||||
IRBuilderTy PredBuilder(TI);
|
IRBuilderTy PredBuilder(TI);
|
||||||
|
|
||||||
LoadInst *Load = PredBuilder.CreateLoad(
|
LoadInst *Load = PredBuilder.CreateAlignedLoad(
|
||||||
LoadTy, InVal,
|
LoadTy, InVal, Alignment,
|
||||||
(PN.getName() + ".sroa.speculate.load." + Pred->getName()));
|
(PN.getName() + ".sroa.speculate.load." + Pred->getName()));
|
||||||
++NumLoadsSpeculated;
|
++NumLoadsSpeculated;
|
||||||
Load->setAlignment(Alignment);
|
|
||||||
if (AATags)
|
if (AATags)
|
||||||
Load->setAAMetadata(AATags);
|
Load->setAAMetadata(AATags);
|
||||||
NewPN->addIncoming(Load, Pred);
|
NewPN->addIncoming(Load, Pred);
|
||||||
|
@ -1688,20 +1685,8 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Compute the adjusted alignment for a load or store from an offset.
|
/// Compute the adjusted alignment for a load or store from an offset.
|
||||||
static Align getAdjustedAlignment(Instruction *I, uint64_t Offset,
|
static Align getAdjustedAlignment(Instruction *I, uint64_t Offset) {
|
||||||
const DataLayout &DL) {
|
return commonAlignment(getLoadStoreAlignment(I), Offset);
|
||||||
MaybeAlign Alignment;
|
|
||||||
Type *Ty;
|
|
||||||
if (auto *LI = dyn_cast<LoadInst>(I)) {
|
|
||||||
Alignment = MaybeAlign(LI->getAlignment());
|
|
||||||
Ty = LI->getType();
|
|
||||||
} else if (auto *SI = dyn_cast<StoreInst>(I)) {
|
|
||||||
Alignment = MaybeAlign(SI->getAlignment());
|
|
||||||
Ty = SI->getValueOperand()->getType();
|
|
||||||
} else {
|
|
||||||
llvm_unreachable("Only loads and stores are allowed!");
|
|
||||||
}
|
|
||||||
return commonAlignment(DL.getValueOrABITypeAlignment(Alignment, Ty), Offset);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Test whether we can convert a value from the old to the new type.
|
/// Test whether we can convert a value from the old to the new type.
|
||||||
|
@ -2448,9 +2433,8 @@ private:
|
||||||
/// You can optionally pass a type to this routine and if that type's ABI
|
/// You can optionally pass a type to this routine and if that type's ABI
|
||||||
/// alignment is itself suitable, this will return zero.
|
/// alignment is itself suitable, this will return zero.
|
||||||
Align getSliceAlign() {
|
Align getSliceAlign() {
|
||||||
Align NewAIAlign = DL.getValueOrABITypeAlignment(
|
return commonAlignment(NewAI.getAlign(),
|
||||||
MaybeAlign(NewAI.getAlignment()), NewAI.getAllocatedType());
|
NewBeginOffset - NewAllocaBeginOffset);
|
||||||
return commonAlignment(NewAIAlign, NewBeginOffset - NewAllocaBeginOffset);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getIndex(uint64_t Offset) {
|
unsigned getIndex(uint64_t Offset) {
|
||||||
|
@ -3139,17 +3123,12 @@ private:
|
||||||
Instruction *I = Uses.pop_back_val();
|
Instruction *I = Uses.pop_back_val();
|
||||||
|
|
||||||
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
|
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
|
||||||
Align LoadAlign =
|
LI->setAlignment(std::min(LI->getAlign(), getSliceAlign()));
|
||||||
DL.getValueOrABITypeAlignment(LI->getAlign(), LI->getType());
|
|
||||||
LI->setAlignment(std::min(LoadAlign, getSliceAlign()));
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
|
if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
|
||||||
Value *Op = SI->getOperand(0);
|
SI->setAlignment(std::min(SI->getAlign(), getSliceAlign()));
|
||||||
Align StoreAlign = DL.getValueOrABITypeAlignment(
|
continue;
|
||||||
MaybeAlign(SI->getAlignment()), Op->getType());
|
|
||||||
SI->setAlignment(std::min(StoreAlign, getSliceAlign()));
|
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I) ||
|
assert(isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I) ||
|
||||||
|
@ -3399,7 +3378,7 @@ private:
|
||||||
AAMDNodes AATags;
|
AAMDNodes AATags;
|
||||||
LI.getAAMetadata(AATags);
|
LI.getAAMetadata(AATags);
|
||||||
LoadOpSplitter Splitter(&LI, *U, LI.getType(), AATags,
|
LoadOpSplitter Splitter(&LI, *U, LI.getType(), AATags,
|
||||||
getAdjustedAlignment(&LI, 0, DL), DL);
|
getAdjustedAlignment(&LI, 0), DL);
|
||||||
Value *V = UndefValue::get(LI.getType());
|
Value *V = UndefValue::get(LI.getType());
|
||||||
Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca");
|
Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca");
|
||||||
LI.replaceAllUsesWith(V);
|
LI.replaceAllUsesWith(V);
|
||||||
|
@ -3446,7 +3425,7 @@ private:
|
||||||
AAMDNodes AATags;
|
AAMDNodes AATags;
|
||||||
SI.getAAMetadata(AATags);
|
SI.getAAMetadata(AATags);
|
||||||
StoreOpSplitter Splitter(&SI, *U, V->getType(), AATags,
|
StoreOpSplitter Splitter(&SI, *U, V->getType(), AATags,
|
||||||
getAdjustedAlignment(&SI, 0, DL), DL);
|
getAdjustedAlignment(&SI, 0), DL);
|
||||||
Splitter.emitSplitOps(V->getType(), V, V->getName() + ".fca");
|
Splitter.emitSplitOps(V->getType(), V, V->getName() + ".fca");
|
||||||
SI.eraseFromParent();
|
SI.eraseFromParent();
|
||||||
return true;
|
return true;
|
||||||
|
@ -3895,7 +3874,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
|
||||||
getAdjustedPtr(IRB, DL, BasePtr,
|
getAdjustedPtr(IRB, DL, BasePtr,
|
||||||
APInt(DL.getIndexSizeInBits(AS), PartOffset),
|
APInt(DL.getIndexSizeInBits(AS), PartOffset),
|
||||||
PartPtrTy, BasePtr->getName() + "."),
|
PartPtrTy, BasePtr->getName() + "."),
|
||||||
getAdjustedAlignment(LI, PartOffset, DL),
|
getAdjustedAlignment(LI, PartOffset),
|
||||||
/*IsVolatile*/ false, LI->getName());
|
/*IsVolatile*/ false, LI->getName());
|
||||||
PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
|
PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
|
||||||
LLVMContext::MD_access_group});
|
LLVMContext::MD_access_group});
|
||||||
|
@ -3953,7 +3932,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
|
||||||
getAdjustedPtr(IRB, DL, StoreBasePtr,
|
getAdjustedPtr(IRB, DL, StoreBasePtr,
|
||||||
APInt(DL.getIndexSizeInBits(AS), PartOffset),
|
APInt(DL.getIndexSizeInBits(AS), PartOffset),
|
||||||
PartPtrTy, StoreBasePtr->getName() + "."),
|
PartPtrTy, StoreBasePtr->getName() + "."),
|
||||||
getAdjustedAlignment(SI, PartOffset, DL),
|
getAdjustedAlignment(SI, PartOffset),
|
||||||
/*IsVolatile*/ false);
|
/*IsVolatile*/ false);
|
||||||
PStore->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
|
PStore->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
|
||||||
LLVMContext::MD_access_group});
|
LLVMContext::MD_access_group});
|
||||||
|
@ -4038,7 +4017,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
|
||||||
getAdjustedPtr(IRB, DL, LoadBasePtr,
|
getAdjustedPtr(IRB, DL, LoadBasePtr,
|
||||||
APInt(DL.getIndexSizeInBits(AS), PartOffset),
|
APInt(DL.getIndexSizeInBits(AS), PartOffset),
|
||||||
LoadPartPtrTy, LoadBasePtr->getName() + "."),
|
LoadPartPtrTy, LoadBasePtr->getName() + "."),
|
||||||
getAdjustedAlignment(LI, PartOffset, DL),
|
getAdjustedAlignment(LI, PartOffset),
|
||||||
/*IsVolatile*/ false, LI->getName());
|
/*IsVolatile*/ false, LI->getName());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4050,7 +4029,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
|
||||||
getAdjustedPtr(IRB, DL, StoreBasePtr,
|
getAdjustedPtr(IRB, DL, StoreBasePtr,
|
||||||
APInt(DL.getIndexSizeInBits(AS), PartOffset),
|
APInt(DL.getIndexSizeInBits(AS), PartOffset),
|
||||||
StorePartPtrTy, StoreBasePtr->getName() + "."),
|
StorePartPtrTy, StoreBasePtr->getName() + "."),
|
||||||
getAdjustedAlignment(SI, PartOffset, DL),
|
getAdjustedAlignment(SI, PartOffset),
|
||||||
/*IsVolatile*/ false);
|
/*IsVolatile*/ false);
|
||||||
|
|
||||||
// Now build a new slice for the alloca.
|
// Now build a new slice for the alloca.
|
||||||
|
@ -4186,13 +4165,8 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
|
||||||
// FIXME: We might want to defer PHI speculation until after here.
|
// FIXME: We might want to defer PHI speculation until after here.
|
||||||
// FIXME: return nullptr;
|
// FIXME: return nullptr;
|
||||||
} else {
|
} else {
|
||||||
// If alignment is unspecified we fallback on the one required by the ABI
|
// Make sure the alignment is compatible with P.beginOffset().
|
||||||
// for this type. We also make sure the alignment is compatible with
|
const Align Alignment = commonAlignment(AI.getAlign(), P.beginOffset());
|
||||||
// P.beginOffset().
|
|
||||||
const Align Alignment = commonAlignment(
|
|
||||||
DL.getValueOrABITypeAlignment(MaybeAlign(AI.getAlignment()),
|
|
||||||
AI.getAllocatedType()),
|
|
||||||
P.beginOffset());
|
|
||||||
// If we will get at least this much alignment from the type alone, leave
|
// If we will get at least this much alignment from the type alone, leave
|
||||||
// the alloca's alignment unconstrained.
|
// the alloca's alignment unconstrained.
|
||||||
const bool IsUnconstrained = Alignment <= DL.getABITypeAlignment(SliceTy);
|
const bool IsUnconstrained = Alignment <= DL.getABITypeAlignment(SliceTy);
|
||||||
|
|
|
@ -128,15 +128,6 @@ public:
|
||||||
private:
|
private:
|
||||||
unsigned getPointerAddressSpace(Value *I);
|
unsigned getPointerAddressSpace(Value *I);
|
||||||
|
|
||||||
Align getAlign(LoadInst *LI) const {
|
|
||||||
return DL.getValueOrABITypeAlignment(LI->getAlign(), LI->getType());
|
|
||||||
}
|
|
||||||
|
|
||||||
Align getAlign(StoreInst *SI) const {
|
|
||||||
return DL.getValueOrABITypeAlignment(SI->getAlign(),
|
|
||||||
SI->getValueOperand()->getType());
|
|
||||||
}
|
|
||||||
|
|
||||||
static const unsigned MaxDepth = 3;
|
static const unsigned MaxDepth = 3;
|
||||||
|
|
||||||
bool isConsecutiveAccess(Value *A, Value *B);
|
bool isConsecutiveAccess(Value *A, Value *B);
|
||||||
|
@ -950,7 +941,7 @@ bool Vectorizer::vectorizeStoreChain(
|
||||||
unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
|
unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
|
||||||
unsigned VF = VecRegSize / Sz;
|
unsigned VF = VecRegSize / Sz;
|
||||||
unsigned ChainSize = Chain.size();
|
unsigned ChainSize = Chain.size();
|
||||||
Align Alignment = getAlign(S0);
|
Align Alignment = S0->getAlign();
|
||||||
|
|
||||||
if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) {
|
if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) {
|
||||||
InstructionsProcessed->insert(Chain.begin(), Chain.end());
|
InstructionsProcessed->insert(Chain.begin(), Chain.end());
|
||||||
|
@ -1103,7 +1094,7 @@ bool Vectorizer::vectorizeLoadChain(
|
||||||
unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
|
unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
|
||||||
unsigned VF = VecRegSize / Sz;
|
unsigned VF = VecRegSize / Sz;
|
||||||
unsigned ChainSize = Chain.size();
|
unsigned ChainSize = Chain.size();
|
||||||
Align Alignment = getAlign(L0);
|
Align Alignment = L0->getAlign();
|
||||||
|
|
||||||
if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) {
|
if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) {
|
||||||
InstructionsProcessed->insert(Chain.begin(), Chain.end());
|
InstructionsProcessed->insert(Chain.begin(), Chain.end());
|
||||||
|
|
|
@ -769,9 +769,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
|
||||||
// Arbitrarily try a vector of 2 elements.
|
// Arbitrarily try a vector of 2 elements.
|
||||||
Type *VecTy = VectorType::get(T, /*NumElements=*/2);
|
Type *VecTy = VectorType::get(T, /*NumElements=*/2);
|
||||||
assert(VecTy && "did not find vectorized version of stored type");
|
assert(VecTy && "did not find vectorized version of stored type");
|
||||||
const MaybeAlign Alignment = getLoadStoreAlignment(ST);
|
if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {
|
||||||
assert(Alignment && "Alignment should be set");
|
|
||||||
if (!TTI->isLegalNTStore(VecTy, *Alignment)) {
|
|
||||||
reportVectorizationFailure(
|
reportVectorizationFailure(
|
||||||
"nontemporal store instruction cannot be vectorized",
|
"nontemporal store instruction cannot be vectorized",
|
||||||
"nontemporal store instruction cannot be vectorized",
|
"nontemporal store instruction cannot be vectorized",
|
||||||
|
@ -786,9 +784,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
|
||||||
// supported on the target (arbitrarily try a vector of 2 elements).
|
// supported on the target (arbitrarily try a vector of 2 elements).
|
||||||
Type *VecTy = VectorType::get(I.getType(), /*NumElements=*/2);
|
Type *VecTy = VectorType::get(I.getType(), /*NumElements=*/2);
|
||||||
assert(VecTy && "did not find vectorized version of load type");
|
assert(VecTy && "did not find vectorized version of load type");
|
||||||
const MaybeAlign Alignment = getLoadStoreAlignment(LD);
|
if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {
|
||||||
assert(Alignment && "Alignment should be set");
|
|
||||||
if (!TTI->isLegalNTLoad(VecTy, *Alignment)) {
|
|
||||||
reportVectorizationFailure(
|
reportVectorizationFailure(
|
||||||
"nontemporal load instruction cannot be vectorized",
|
"nontemporal load instruction cannot be vectorized",
|
||||||
"nontemporal load instruction cannot be vectorized",
|
"nontemporal load instruction cannot be vectorized",
|
||||||
|
|
|
@ -1233,7 +1233,7 @@ public:
|
||||||
if (!LI && !SI)
|
if (!LI && !SI)
|
||||||
return false;
|
return false;
|
||||||
auto *Ty = getMemInstValueType(V);
|
auto *Ty = getMemInstValueType(V);
|
||||||
MaybeAlign Align = getLoadStoreAlignment(V);
|
Align Align = getLoadStoreAlignment(V);
|
||||||
return (LI && isLegalMaskedGather(Ty, Align)) ||
|
return (LI && isLegalMaskedGather(Ty, Align)) ||
|
||||||
(SI && isLegalMaskedScatter(Ty, Align));
|
(SI && isLegalMaskedScatter(Ty, Align));
|
||||||
}
|
}
|
||||||
|
@ -2383,11 +2383,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
|
||||||
|
|
||||||
Type *ScalarDataTy = getMemInstValueType(Instr);
|
Type *ScalarDataTy = getMemInstValueType(Instr);
|
||||||
Type *DataTy = VectorType::get(ScalarDataTy, VF);
|
Type *DataTy = VectorType::get(ScalarDataTy, VF);
|
||||||
// An alignment of 0 means target abi alignment. We need to use the scalar's
|
const Align Alignment = getLoadStoreAlignment(Instr);
|
||||||
// target abi alignment in such a case.
|
|
||||||
const DataLayout &DL = Instr->getModule()->getDataLayout();
|
|
||||||
const Align Alignment =
|
|
||||||
DL.getValueOrABITypeAlignment(getLoadStoreAlignment(Instr), ScalarDataTy);
|
|
||||||
|
|
||||||
// Determine if the pointer operand of the access is either consecutive or
|
// Determine if the pointer operand of the access is either consecutive or
|
||||||
// reverse consecutive.
|
// reverse consecutive.
|
||||||
|
@ -4650,7 +4646,7 @@ bool LoopVectorizationCostModel::isScalarWithPredication(Instruction *I, unsigne
|
||||||
"Widening decision should be ready at this moment");
|
"Widening decision should be ready at this moment");
|
||||||
return WideningDecision == CM_Scalarize;
|
return WideningDecision == CM_Scalarize;
|
||||||
}
|
}
|
||||||
const MaybeAlign Alignment = getLoadStoreAlignment(I);
|
const Align Alignment = getLoadStoreAlignment(I);
|
||||||
return isa<LoadInst>(I) ? !(isLegalMaskedLoad(Ty, Ptr, Alignment) ||
|
return isa<LoadInst>(I) ? !(isLegalMaskedLoad(Ty, Ptr, Alignment) ||
|
||||||
isLegalMaskedGather(Ty, Alignment))
|
isLegalMaskedGather(Ty, Alignment))
|
||||||
: !(isLegalMaskedStore(Ty, Ptr, Alignment) ||
|
: !(isLegalMaskedStore(Ty, Ptr, Alignment) ||
|
||||||
|
@ -4697,7 +4693,7 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(Instruction *I,
|
||||||
"Masked interleave-groups for predicated accesses are not enabled.");
|
"Masked interleave-groups for predicated accesses are not enabled.");
|
||||||
|
|
||||||
auto *Ty = getMemInstValueType(I);
|
auto *Ty = getMemInstValueType(I);
|
||||||
const MaybeAlign Alignment = getLoadStoreAlignment(I);
|
const Align Alignment = getLoadStoreAlignment(I);
|
||||||
return isa<LoadInst>(I) ? TTI.isLegalMaskedLoad(Ty, Alignment)
|
return isa<LoadInst>(I) ? TTI.isLegalMaskedLoad(Ty, Alignment)
|
||||||
: TTI.isLegalMaskedStore(Ty, Alignment);
|
: TTI.isLegalMaskedStore(Ty, Alignment);
|
||||||
}
|
}
|
||||||
|
@ -5845,7 +5841,7 @@ unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
|
||||||
|
|
||||||
// Don't pass *I here, since it is scalar but will actually be part of a
|
// Don't pass *I here, since it is scalar but will actually be part of a
|
||||||
// vectorized loop where the user of it is a vectorized instruction.
|
// vectorized loop where the user of it is a vectorized instruction.
|
||||||
const MaybeAlign Alignment = getLoadStoreAlignment(I);
|
const Align Alignment = getLoadStoreAlignment(I);
|
||||||
Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),
|
Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),
|
||||||
Alignment, AS,
|
Alignment, AS,
|
||||||
TTI::TCK_RecipThroughput);
|
TTI::TCK_RecipThroughput);
|
||||||
|
@ -5880,12 +5876,11 @@ unsigned LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
|
||||||
|
|
||||||
assert((ConsecutiveStride == 1 || ConsecutiveStride == -1) &&
|
assert((ConsecutiveStride == 1 || ConsecutiveStride == -1) &&
|
||||||
"Stride should be 1 or -1 for consecutive memory access");
|
"Stride should be 1 or -1 for consecutive memory access");
|
||||||
const MaybeAlign Alignment = getLoadStoreAlignment(I);
|
const Align Alignment = getLoadStoreAlignment(I);
|
||||||
unsigned Cost = 0;
|
unsigned Cost = 0;
|
||||||
if (Legal->isMaskRequired(I))
|
if (Legal->isMaskRequired(I))
|
||||||
Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy,
|
Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy,
|
||||||
Alignment ? Alignment->value() : 0, AS,
|
Alignment.value(), AS, CostKind);
|
||||||
CostKind);
|
|
||||||
else
|
else
|
||||||
Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS,
|
Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS,
|
||||||
CostKind, I);
|
CostKind, I);
|
||||||
|
@ -5900,7 +5895,7 @@ unsigned LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I,
|
||||||
unsigned VF) {
|
unsigned VF) {
|
||||||
Type *ValTy = getMemInstValueType(I);
|
Type *ValTy = getMemInstValueType(I);
|
||||||
auto *VectorTy = cast<VectorType>(ToVectorTy(ValTy, VF));
|
auto *VectorTy = cast<VectorType>(ToVectorTy(ValTy, VF));
|
||||||
const MaybeAlign Alignment = getLoadStoreAlignment(I);
|
const Align Alignment = getLoadStoreAlignment(I);
|
||||||
unsigned AS = getLoadStoreAddressSpace(I);
|
unsigned AS = getLoadStoreAddressSpace(I);
|
||||||
enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
|
enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
|
||||||
if (isa<LoadInst>(I)) {
|
if (isa<LoadInst>(I)) {
|
||||||
|
@ -5925,13 +5920,12 @@ unsigned LoopVectorizationCostModel::getGatherScatterCost(Instruction *I,
|
||||||
unsigned VF) {
|
unsigned VF) {
|
||||||
Type *ValTy = getMemInstValueType(I);
|
Type *ValTy = getMemInstValueType(I);
|
||||||
auto *VectorTy = cast<VectorType>(ToVectorTy(ValTy, VF));
|
auto *VectorTy = cast<VectorType>(ToVectorTy(ValTy, VF));
|
||||||
const MaybeAlign Alignment = getLoadStoreAlignment(I);
|
const Align Alignment = getLoadStoreAlignment(I);
|
||||||
Value *Ptr = getLoadStorePointerOperand(I);
|
Value *Ptr = getLoadStorePointerOperand(I);
|
||||||
|
|
||||||
return TTI.getAddressComputationCost(VectorTy) +
|
return TTI.getAddressComputationCost(VectorTy) +
|
||||||
TTI.getGatherScatterOpCost(I->getOpcode(), VectorTy, Ptr,
|
TTI.getGatherScatterOpCost(I->getOpcode(), VectorTy, Ptr,
|
||||||
Legal->isMaskRequired(I),
|
Legal->isMaskRequired(I), Alignment.value(),
|
||||||
Alignment ? Alignment->value() : 0,
|
|
||||||
TargetTransformInfo::TCK_RecipThroughput,
|
TargetTransformInfo::TCK_RecipThroughput,
|
||||||
I);
|
I);
|
||||||
}
|
}
|
||||||
|
@ -5981,7 +5975,7 @@ unsigned LoopVectorizationCostModel::getMemoryInstructionCost(Instruction *I,
|
||||||
// moment.
|
// moment.
|
||||||
if (VF == 1) {
|
if (VF == 1) {
|
||||||
Type *ValTy = getMemInstValueType(I);
|
Type *ValTy = getMemInstValueType(I);
|
||||||
const MaybeAlign Alignment = getLoadStoreAlignment(I);
|
const Align Alignment = getLoadStoreAlignment(I);
|
||||||
unsigned AS = getLoadStoreAddressSpace(I);
|
unsigned AS = getLoadStoreAddressSpace(I);
|
||||||
|
|
||||||
return TTI.getAddressComputationCost(ValTy) +
|
return TTI.getAddressComputationCost(ValTy) +
|
||||||
|
|
|
@ -4401,7 +4401,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||||
setInsertPointAfterBundle(E);
|
setInsertPointAfterBundle(E);
|
||||||
|
|
||||||
LoadInst *LI = cast<LoadInst>(VL0);
|
LoadInst *LI = cast<LoadInst>(VL0);
|
||||||
Type *ScalarLoadTy = LI->getType();
|
|
||||||
unsigned AS = LI->getPointerAddressSpace();
|
unsigned AS = LI->getPointerAddressSpace();
|
||||||
|
|
||||||
Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(),
|
Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(),
|
||||||
|
@ -4414,9 +4413,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||||
if (getTreeEntry(PO))
|
if (getTreeEntry(PO))
|
||||||
ExternalUses.push_back(ExternalUser(PO, cast<User>(VecPtr), 0));
|
ExternalUses.push_back(ExternalUser(PO, cast<User>(VecPtr), 0));
|
||||||
|
|
||||||
Align Alignment = DL->getValueOrABITypeAlignment(LI->getAlign(),
|
LI = Builder.CreateAlignedLoad(VecTy, VecPtr, LI->getAlign());
|
||||||
ScalarLoadTy);
|
|
||||||
LI = Builder.CreateAlignedLoad(VecTy, VecPtr, Alignment);
|
|
||||||
Value *V = propagateMetadata(LI, E->Scalars);
|
Value *V = propagateMetadata(LI, E->Scalars);
|
||||||
if (IsReorder) {
|
if (IsReorder) {
|
||||||
SmallVector<int, 4> Mask;
|
SmallVector<int, 4> Mask;
|
||||||
|
@ -4437,7 +4434,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||||
bool IsReorder = !E->ReorderIndices.empty();
|
bool IsReorder = !E->ReorderIndices.empty();
|
||||||
auto *SI = cast<StoreInst>(
|
auto *SI = cast<StoreInst>(
|
||||||
IsReorder ? E->Scalars[E->ReorderIndices.front()] : VL0);
|
IsReorder ? E->Scalars[E->ReorderIndices.front()] : VL0);
|
||||||
unsigned Alignment = SI->getAlignment();
|
|
||||||
unsigned AS = SI->getPointerAddressSpace();
|
unsigned AS = SI->getPointerAddressSpace();
|
||||||
|
|
||||||
setInsertPointAfterBundle(E);
|
setInsertPointAfterBundle(E);
|
||||||
|
@ -4453,7 +4449,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||||
Value *ScalarPtr = SI->getPointerOperand();
|
Value *ScalarPtr = SI->getPointerOperand();
|
||||||
Value *VecPtr = Builder.CreateBitCast(
|
Value *VecPtr = Builder.CreateBitCast(
|
||||||
ScalarPtr, VecValue->getType()->getPointerTo(AS));
|
ScalarPtr, VecValue->getType()->getPointerTo(AS));
|
||||||
StoreInst *ST = Builder.CreateStore(VecValue, VecPtr);
|
StoreInst *ST = Builder.CreateAlignedStore(VecValue, VecPtr,
|
||||||
|
SI->getAlign());
|
||||||
|
|
||||||
// The pointer operand uses an in-tree scalar, so add the new BitCast to
|
// The pointer operand uses an in-tree scalar, so add the new BitCast to
|
||||||
// ExternalUses to make sure that an extract will be generated in the
|
// ExternalUses to make sure that an extract will be generated in the
|
||||||
|
@ -4461,10 +4458,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||||
if (getTreeEntry(ScalarPtr))
|
if (getTreeEntry(ScalarPtr))
|
||||||
ExternalUses.push_back(ExternalUser(ScalarPtr, cast<User>(VecPtr), 0));
|
ExternalUses.push_back(ExternalUser(ScalarPtr, cast<User>(VecPtr), 0));
|
||||||
|
|
||||||
if (!Alignment)
|
|
||||||
Alignment = DL->getABITypeAlignment(SI->getValueOperand()->getType());
|
|
||||||
|
|
||||||
ST->setAlignment(Align(Alignment));
|
|
||||||
Value *V = propagateMetadata(ST, E->Scalars);
|
Value *V = propagateMetadata(ST, E->Scalars);
|
||||||
if (NeedToShuffleReuses) {
|
if (NeedToShuffleReuses) {
|
||||||
V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
|
V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
|
||||||
|
|
Loading…
Reference in New Issue