forked from OSchip/llvm-project
[LV] Move code from vectorizeMemoryInstruction to recipe's execute().
The code in widenMemoryInstruction has already been transitioned to only rely on information provided by VPWidenMemoryInstructionRecipe directly. Moving the code directly to VPWidenMemoryInstructionRecipe::execute completes the transition for the recipe. It provides the following advantages: 1. Less indirection, easier to see what's going on. 2. Removes accesses to fields of ILV. 2) in particular ensures that no dependencies on fields in ILV for vector code generation are re-introduced. Reviewed By: Ayal Differential Revision: https://reviews.llvm.org/D114324
This commit is contained in:
parent
003c9c7457
commit
e44298a8f8
|
@ -524,15 +524,6 @@ public:
|
|||
ArrayRef<VPValue *> StoredValues,
|
||||
VPValue *BlockInMask = nullptr);
|
||||
|
||||
/// Vectorize Load and Store instructions with the base address given in \p
|
||||
/// Addr, optionally masking the vector operations if \p BlockInMask is
|
||||
/// non-null. Use \p State to translate given VPValues to IR values in the
|
||||
/// vectorized loop.
|
||||
void vectorizeMemoryInstruction(Instruction *Instr, VPTransformState &State,
|
||||
VPValue *Def, VPValue *Addr,
|
||||
VPValue *StoredValue, VPValue *BlockInMask,
|
||||
bool ConsecutiveStride, bool Reverse);
|
||||
|
||||
/// Set the debug location in the builder \p Ptr using the debug location in
|
||||
/// \p V. If \p Ptr is None then it uses the class member's Builder.
|
||||
void setDebugLocFromInst(const Value *V,
|
||||
|
@ -3004,130 +2995,6 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
|
|||
}
|
||||
}
|
||||
|
||||
void InnerLoopVectorizer::vectorizeMemoryInstruction(
|
||||
Instruction *Instr, VPTransformState &State, VPValue *Def, VPValue *Addr,
|
||||
VPValue *StoredValue, VPValue *BlockInMask, bool ConsecutiveStride,
|
||||
bool Reverse) {
|
||||
// Attempt to issue a wide load.
|
||||
LoadInst *LI = dyn_cast<LoadInst>(Instr);
|
||||
StoreInst *SI = dyn_cast<StoreInst>(Instr);
|
||||
|
||||
assert((LI || SI) && "Invalid Load/Store instruction");
|
||||
assert((!SI || StoredValue) && "No stored value provided for widened store");
|
||||
assert((!LI || !StoredValue) && "Stored value provided for widened load");
|
||||
|
||||
Type *ScalarDataTy = getLoadStoreType(Instr);
|
||||
|
||||
auto *DataTy = VectorType::get(ScalarDataTy, VF);
|
||||
const Align Alignment = getLoadStoreAlignment(Instr);
|
||||
bool CreateGatherScatter = !ConsecutiveStride;
|
||||
|
||||
VectorParts BlockInMaskParts(UF);
|
||||
bool isMaskRequired = BlockInMask;
|
||||
if (isMaskRequired)
|
||||
for (unsigned Part = 0; Part < UF; ++Part)
|
||||
BlockInMaskParts[Part] = State.get(BlockInMask, Part);
|
||||
|
||||
const auto CreateVecPtr = [&](unsigned Part, Value *Ptr) -> Value * {
|
||||
// Calculate the pointer for the specific unroll-part.
|
||||
GetElementPtrInst *PartPtr = nullptr;
|
||||
|
||||
bool InBounds = false;
|
||||
if (auto *gep = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts()))
|
||||
InBounds = gep->isInBounds();
|
||||
if (Reverse) {
|
||||
// If the address is consecutive but reversed, then the
|
||||
// wide store needs to start at the last vector element.
|
||||
// RunTimeVF = VScale * VF.getKnownMinValue()
|
||||
// For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
|
||||
Value *RunTimeVF = getRuntimeVF(Builder, Builder.getInt32Ty(), VF);
|
||||
// NumElt = -Part * RunTimeVF
|
||||
Value *NumElt = Builder.CreateMul(Builder.getInt32(-Part), RunTimeVF);
|
||||
// LastLane = 1 - RunTimeVF
|
||||
Value *LastLane = Builder.CreateSub(Builder.getInt32(1), RunTimeVF);
|
||||
PartPtr =
|
||||
cast<GetElementPtrInst>(Builder.CreateGEP(ScalarDataTy, Ptr, NumElt));
|
||||
PartPtr->setIsInBounds(InBounds);
|
||||
PartPtr = cast<GetElementPtrInst>(
|
||||
Builder.CreateGEP(ScalarDataTy, PartPtr, LastLane));
|
||||
PartPtr->setIsInBounds(InBounds);
|
||||
if (isMaskRequired) // Reverse of a null all-one mask is a null mask.
|
||||
BlockInMaskParts[Part] = reverseVector(BlockInMaskParts[Part]);
|
||||
} else {
|
||||
Value *Increment =
|
||||
createStepForVF(Builder, Builder.getInt32Ty(), VF, Part);
|
||||
PartPtr = cast<GetElementPtrInst>(
|
||||
Builder.CreateGEP(ScalarDataTy, Ptr, Increment));
|
||||
PartPtr->setIsInBounds(InBounds);
|
||||
}
|
||||
|
||||
unsigned AddressSpace = Ptr->getType()->getPointerAddressSpace();
|
||||
return Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace));
|
||||
};
|
||||
|
||||
// Handle Stores:
|
||||
if (SI) {
|
||||
setDebugLocFromInst(SI);
|
||||
|
||||
for (unsigned Part = 0; Part < UF; ++Part) {
|
||||
Instruction *NewSI = nullptr;
|
||||
Value *StoredVal = State.get(StoredValue, Part);
|
||||
if (CreateGatherScatter) {
|
||||
Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
|
||||
Value *VectorGep = State.get(Addr, Part);
|
||||
NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment,
|
||||
MaskPart);
|
||||
} else {
|
||||
if (Reverse) {
|
||||
// If we store to reverse consecutive memory locations, then we need
|
||||
// to reverse the order of elements in the stored value.
|
||||
StoredVal = reverseVector(StoredVal);
|
||||
// We don't want to update the value in the map as it might be used in
|
||||
// another expression. So don't call resetVectorValue(StoredVal).
|
||||
}
|
||||
auto *VecPtr = CreateVecPtr(Part, State.get(Addr, VPIteration(0, 0)));
|
||||
if (isMaskRequired)
|
||||
NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment,
|
||||
BlockInMaskParts[Part]);
|
||||
else
|
||||
NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment);
|
||||
}
|
||||
addMetadata(NewSI, SI);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Handle loads.
|
||||
assert(LI && "Must have a load instruction");
|
||||
setDebugLocFromInst(LI);
|
||||
for (unsigned Part = 0; Part < UF; ++Part) {
|
||||
Value *NewLI;
|
||||
if (CreateGatherScatter) {
|
||||
Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
|
||||
Value *VectorGep = State.get(Addr, Part);
|
||||
NewLI = Builder.CreateMaskedGather(DataTy, VectorGep, Alignment, MaskPart,
|
||||
nullptr, "wide.masked.gather");
|
||||
addMetadata(NewLI, LI);
|
||||
} else {
|
||||
auto *VecPtr = CreateVecPtr(Part, State.get(Addr, VPIteration(0, 0)));
|
||||
if (isMaskRequired)
|
||||
NewLI = Builder.CreateMaskedLoad(
|
||||
DataTy, VecPtr, Alignment, BlockInMaskParts[Part],
|
||||
PoisonValue::get(DataTy), "wide.masked.load");
|
||||
else
|
||||
NewLI =
|
||||
Builder.CreateAlignedLoad(DataTy, VecPtr, Alignment, "wide.load");
|
||||
|
||||
// Add metadata to the load, but setVectorValue to the reverse shuffle.
|
||||
addMetadata(NewLI, LI);
|
||||
if (Reverse)
|
||||
NewLI = reverseVector(NewLI);
|
||||
}
|
||||
|
||||
State.set(Def, NewLI, Part);
|
||||
}
|
||||
}
|
||||
|
||||
void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr,
|
||||
VPReplicateRecipe *RepRecipe,
|
||||
const VPIteration &Instance,
|
||||
|
@ -10057,9 +9924,129 @@ void VPPredInstPHIRecipe::execute(VPTransformState &State) {
|
|||
|
||||
void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
|
||||
VPValue *StoredValue = isStore() ? getStoredValue() : nullptr;
|
||||
State.ILV->vectorizeMemoryInstruction(
|
||||
&Ingredient, State, StoredValue ? nullptr : getVPSingleValue(), getAddr(),
|
||||
StoredValue, getMask(), Consecutive, Reverse);
|
||||
|
||||
// Attempt to issue a wide load.
|
||||
LoadInst *LI = dyn_cast<LoadInst>(&Ingredient);
|
||||
StoreInst *SI = dyn_cast<StoreInst>(&Ingredient);
|
||||
|
||||
assert((LI || SI) && "Invalid Load/Store instruction");
|
||||
assert((!SI || StoredValue) && "No stored value provided for widened store");
|
||||
assert((!LI || !StoredValue) && "Stored value provided for widened load");
|
||||
|
||||
Type *ScalarDataTy = getLoadStoreType(&Ingredient);
|
||||
|
||||
auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
|
||||
const Align Alignment = getLoadStoreAlignment(&Ingredient);
|
||||
bool CreateGatherScatter = !Consecutive;
|
||||
|
||||
auto &Builder = State.Builder;
|
||||
InnerLoopVectorizer::VectorParts BlockInMaskParts(State.UF);
|
||||
bool isMaskRequired = getMask();
|
||||
if (isMaskRequired)
|
||||
for (unsigned Part = 0; Part < State.UF; ++Part)
|
||||
BlockInMaskParts[Part] = State.get(getMask(), Part);
|
||||
|
||||
const auto CreateVecPtr = [&](unsigned Part, Value *Ptr) -> Value * {
|
||||
// Calculate the pointer for the specific unroll-part.
|
||||
GetElementPtrInst *PartPtr = nullptr;
|
||||
|
||||
bool InBounds = false;
|
||||
if (auto *gep = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts()))
|
||||
InBounds = gep->isInBounds();
|
||||
if (Reverse) {
|
||||
// If the address is consecutive but reversed, then the
|
||||
// wide store needs to start at the last vector element.
|
||||
// RunTimeVF = VScale * VF.getKnownMinValue()
|
||||
// For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
|
||||
Value *RunTimeVF = getRuntimeVF(Builder, Builder.getInt32Ty(), State.VF);
|
||||
// NumElt = -Part * RunTimeVF
|
||||
Value *NumElt = Builder.CreateMul(Builder.getInt32(-Part), RunTimeVF);
|
||||
// LastLane = 1 - RunTimeVF
|
||||
Value *LastLane = Builder.CreateSub(Builder.getInt32(1), RunTimeVF);
|
||||
PartPtr =
|
||||
cast<GetElementPtrInst>(Builder.CreateGEP(ScalarDataTy, Ptr, NumElt));
|
||||
PartPtr->setIsInBounds(InBounds);
|
||||
PartPtr = cast<GetElementPtrInst>(
|
||||
Builder.CreateGEP(ScalarDataTy, PartPtr, LastLane));
|
||||
PartPtr->setIsInBounds(InBounds);
|
||||
if (isMaskRequired) // Reverse of a null all-one mask is a null mask.
|
||||
BlockInMaskParts[Part] =
|
||||
Builder.CreateVectorReverse(BlockInMaskParts[Part], "reverse");
|
||||
} else {
|
||||
Value *Increment =
|
||||
createStepForVF(Builder, Builder.getInt32Ty(), State.VF, Part);
|
||||
PartPtr = cast<GetElementPtrInst>(
|
||||
Builder.CreateGEP(ScalarDataTy, Ptr, Increment));
|
||||
PartPtr->setIsInBounds(InBounds);
|
||||
}
|
||||
|
||||
unsigned AddressSpace = Ptr->getType()->getPointerAddressSpace();
|
||||
return Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace));
|
||||
};
|
||||
|
||||
// Handle Stores:
|
||||
if (SI) {
|
||||
State.ILV->setDebugLocFromInst(SI);
|
||||
|
||||
for (unsigned Part = 0; Part < State.UF; ++Part) {
|
||||
Instruction *NewSI = nullptr;
|
||||
Value *StoredVal = State.get(StoredValue, Part);
|
||||
if (CreateGatherScatter) {
|
||||
Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
|
||||
Value *VectorGep = State.get(getAddr(), Part);
|
||||
NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment,
|
||||
MaskPart);
|
||||
} else {
|
||||
if (Reverse) {
|
||||
// If we store to reverse consecutive memory locations, then we need
|
||||
// to reverse the order of elements in the stored value.
|
||||
StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse");
|
||||
// We don't want to update the value in the map as it might be used in
|
||||
// another expression. So don't call resetVectorValue(StoredVal).
|
||||
}
|
||||
auto *VecPtr =
|
||||
CreateVecPtr(Part, State.get(getAddr(), VPIteration(0, 0)));
|
||||
if (isMaskRequired)
|
||||
NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment,
|
||||
BlockInMaskParts[Part]);
|
||||
else
|
||||
NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment);
|
||||
}
|
||||
State.ILV->addMetadata(NewSI, SI);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Handle loads.
|
||||
assert(LI && "Must have a load instruction");
|
||||
State.ILV->setDebugLocFromInst(LI);
|
||||
for (unsigned Part = 0; Part < State.UF; ++Part) {
|
||||
Value *NewLI;
|
||||
if (CreateGatherScatter) {
|
||||
Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
|
||||
Value *VectorGep = State.get(getAddr(), Part);
|
||||
NewLI = Builder.CreateMaskedGather(DataTy, VectorGep, Alignment, MaskPart,
|
||||
nullptr, "wide.masked.gather");
|
||||
State.ILV->addMetadata(NewLI, LI);
|
||||
} else {
|
||||
auto *VecPtr =
|
||||
CreateVecPtr(Part, State.get(getAddr(), VPIteration(0, 0)));
|
||||
if (isMaskRequired)
|
||||
NewLI = Builder.CreateMaskedLoad(
|
||||
DataTy, VecPtr, Alignment, BlockInMaskParts[Part],
|
||||
PoisonValue::get(DataTy), "wide.masked.load");
|
||||
else
|
||||
NewLI =
|
||||
Builder.CreateAlignedLoad(DataTy, VecPtr, Alignment, "wide.load");
|
||||
|
||||
// Add metadata to the load, but setVectorValue to the reverse shuffle.
|
||||
State.ILV->addMetadata(NewLI, LI);
|
||||
if (Reverse)
|
||||
NewLI = Builder.CreateVectorReverse(NewLI, "reverse");
|
||||
}
|
||||
|
||||
State.set(getVPSingleValue(), NewLI, Part);
|
||||
}
|
||||
}
|
||||
|
||||
// Determine how to lower the scalar epilogue, which depends on 1) optimising
|
||||
|
|
Loading…
Reference in New Issue