[CodeGen] In narrowExtractedVectorLoad bail out for scalable vectors

In narrowExtractedVectorLoad there is an optimisation that tries to
combine extract_subvector with a narrowing vector load. At the moment
this produces warnings due to the incorrect calls to
getVectorNumElements() for scalable vector types. I've got this
working for scalable vectors too when the extract subvector index
is a multiple of the minimum number of elements. I have added a
new variant of the function:

  MachineFunction::getMachineMemOperand

that copies an existing MachineMemOperand, but replaces the pointer
info with a null version since we cannot currently represent scaled
offsets.

I've added a new test for this particular case in:

  CodeGen/AArch64/sve-extract-subvector.ll

Differential Revision: https://reviews.llvm.org/D83950
This commit is contained in:
David Sherwood 2020-07-14 13:50:21 +01:00
parent 3948341fa5
commit 3ec3fcb97a
4 changed files with 47 additions and 17 deletions

View File

@ -815,6 +815,14 @@ public:
MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO,
int64_t Offset, uint64_t Size);
/// getMachineMemOperand - Allocate a new MachineMemOperand by copying
/// an existing one, replacing only the MachinePointerInfo and size.
/// MachineMemOperands are owned by the MachineFunction and need not be
/// explicitly deallocated.
MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO,
MachinePointerInfo &PtrInfo,
uint64_t Size);
/// Allocate a new MachineMemOperand by copying an existing one,
/// replacing only AliasAnalysis information. MachineMemOperands are owned
/// by the MachineFunction and need not be explicitly deallocated.

View File

@ -474,6 +474,13 @@ MachineMemOperand *MachineFunction::getMachineMemOperand(
SSID, Ordering, FailureOrdering);
}
MachineMemOperand *MachineFunction::getMachineMemOperand(
const MachineMemOperand *MMO, MachinePointerInfo &PtrInfo, uint64_t Size) {
return new (Allocator) MachineMemOperand(
PtrInfo, MMO->getFlags(), Size, Alignment, AAMDNodes(), nullptr,
MMO->getSyncScopeID(), MMO->getOrdering(), MMO->getFailureOrdering());
}
MachineMemOperand *
MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
int64_t Offset, uint64_t Size) {

View File

@ -19338,19 +19338,15 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
return SDValue();
unsigned Index = ExtIdx->getZExtValue();
unsigned NumElts = VT.getVectorNumElements();
unsigned NumElts = VT.getVectorMinNumElements();
// If the index is a multiple of the extract element count, we can offset the
// address by the store size multiplied by the subvector index. Otherwise if
// the scalar type is byte sized, we can just use the index multiplied by
// the element size in bytes as the offset.
unsigned Offset;
if (Index % NumElts == 0)
Offset = (Index / NumElts) * VT.getStoreSize();
else if (VT.getScalarType().isByteSized())
Offset = Index * VT.getScalarType().getStoreSize();
else
return SDValue();
// The definition of EXTRACT_SUBVECTOR states that the index must be a
// multiple of the minimum number of elements in the result type.
assert(Index % NumElts == 0 && "The extract subvector index is not a "
"multiple of the result's element count");
// It's fine to use TypeSize here as we know the offset will not be negative.
TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
@ -19359,14 +19355,21 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
// The narrow load will be offset from the base address of the old load if
// we are extracting from something besides index 0 (little-endian).
SDLoc DL(Extract);
SDValue BaseAddr = Ld->getBasePtr();
// TODO: Use "BaseIndexOffset" to make this more effective.
SDValue NewAddr =
DAG.getMemBasePlusOffset(BaseAddr, TypeSize::Fixed(Offset), DL);
SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
uint64_t StoreSize = MemoryLocation::getSizeOrUnknown(VT.getStoreSize());
MachineFunction &MF = DAG.getMachineFunction();
MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
VT.getStoreSize());
MachineMemOperand *MMO;
if (Offset.isScalable()) {
MachinePointerInfo MPI =
MachinePointerInfo(Ld->getPointerInfo().getAddrSpace());
MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
} else
MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedSize(),
StoreSize);
SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
return NewLd;

View File

@ -64,7 +64,19 @@ define <vscale x 2 x float> @extract_hi_nxv2f32_nxv4f32(<vscale x 4 x float> %z0
ret <vscale x 2 x float> %ext
}
define <vscale x 4 x float> @load_extract_nxv4f32_nxv8f32(<vscale x 8 x float>* %p) {
; CHECK-LABEL: load_extract_nxv4f32_nxv8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, #1, mul vl]
; CHECK-NEXT: ret
%tmp1 = load <vscale x 8 x float>, <vscale x 8 x float>* %p, align 16
%tmp2 = call <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv8f32(<vscale x 8 x float> %tmp1, i32 1)
ret <vscale x 4 x float> %tmp2
}
declare <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv4i64(<vscale x 4 x i64>, i32)
declare <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv32i8(<vscale x 32 x i8>, i32)
declare <vscale x 2 x float> @llvm.aarch64.sve.tuple.get.nxv4f32(<vscale x 4 x float>, i32)
declare <vscale x 4 x half> @llvm.aarch64.sve.tuple.get.nxv8f16(<vscale x 8 x half>, i32)
declare <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv8f32(<vscale x 8 x float>, i32)