[AArch64][GlobalISel] Fix crash during legalization of a vector G_SELECT with scalar mask.

The lowering of vector selects needs to first splat the scalar mask into a vector
first.

This was causing a crash when building oggenc in the test suite.

Differential Revision: https://reviews.llvm.org/D91655
This commit is contained in:
Amara Emerson 2020-11-17 12:09:31 -08:00
parent 871f96eed3
commit 87ff156414
5 changed files with 105 additions and 2 deletions

View File

@ -958,6 +958,23 @@ public:
MachineInstrBuilder buildBuildVectorTrunc(const DstOp &Res,
ArrayRef<Register> Ops);
/// Build and insert a vector splat of a scalar \p Src using a
/// G_INSERT_VECTOR_ELT and G_SHUFFLE_VECTOR idiom.
///
/// \pre setBasicBlock or setMI must have been called.
/// \pre \p Src must have the same type as the element type of \p Dst
///
/// \return a MachineInstrBuilder for the newly created instruction.
MachineInstrBuilder buildShuffleSplat(const DstOp &Res, const SrcOp &Src);
/// Build and insert \p Res = G_SHUFFLE_VECTOR \p Src1, \p Src2, \p Mask
///
/// \pre setBasicBlock or setMI must have been called.
///
/// \return a MachineInstrBuilder for the newly created instruction.
MachineInstrBuilder buildShuffleVector(const DstOp &Res, const SrcOp &Src1,
const SrcOp &Src2, ArrayRef<int> Mask);
/// Build and insert \p Res = G_CONCAT_VECTORS \p Op0, ...
///
/// G_CONCAT_VECTORS creates a vector from the concatenation of 2 or more

View File

@ -6217,8 +6217,23 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) {
if (!DstTy.isVector())
return UnableToLegalize;
if (MaskTy.getSizeInBits() != Op1Ty.getSizeInBits())
// Vector selects can have a scalar predicate. If so, splat into a vector and
// finish for later legalization attempts to try again.
if (MaskTy.isScalar()) {
Register MaskElt = MaskReg;
if (MaskTy.getSizeInBits() < DstTy.getScalarSizeInBits())
MaskElt = MIRBuilder.buildSExt(DstTy.getElementType(), MaskElt).getReg(0);
// Generate a vector splat idiom to be pattern matched later.
auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
Observer.changingInstr(MI);
MI.getOperand(1).setReg(ShufSplat.getReg(0));
Observer.changedInstr(MI);
return Legalized;
}
if (MaskTy.getSizeInBits() != Op1Ty.getSizeInBits()) {
return UnableToLegalize;
}
auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);

View File

@ -635,6 +635,33 @@ MachineIRBuilder::buildBuildVectorTrunc(const DstOp &Res,
return buildInstr(TargetOpcode::G_BUILD_VECTOR_TRUNC, Res, TmpVec);
}
MachineInstrBuilder MachineIRBuilder::buildShuffleSplat(const DstOp &Res,
const SrcOp &Src) {
LLT DstTy = Res.getLLTTy(*getMRI());
LLT SrcTy = Src.getLLTTy(*getMRI());
assert(SrcTy == DstTy.getElementType() && "Expected Src to match Dst elt ty");
auto UndefVec = buildUndef(DstTy);
auto Zero = buildConstant(LLT::scalar(64), 0);
auto InsElt = buildInsertVectorElement(DstTy, UndefVec, Src, Zero);
SmallVector<int, 16> ZeroMask(DstTy.getNumElements());
return buildShuffleVector(DstTy, InsElt, UndefVec, ZeroMask);
}
MachineInstrBuilder MachineIRBuilder::buildShuffleVector(const DstOp &Res,
const SrcOp &Src1,
const SrcOp &Src2,
ArrayRef<int> Mask) {
LLT DstTy = Res.getLLTTy(*getMRI());
LLT Src1Ty = Src1.getLLTTy(*getMRI());
LLT Src2Ty = Src2.getLLTTy(*getMRI());
assert(Src1Ty.getNumElements() + Src2Ty.getNumElements() >= Mask.size());
assert(DstTy.getElementType() == Src1Ty.getElementType() &&
DstTy.getElementType() == Src2Ty.getElementType());
ArrayRef<int> MaskAlloc = getMF().allocateShuffleMask(Mask);
return buildInstr(TargetOpcode::G_SHUFFLE_VECTOR, {DstTy}, {Src1, Src2})
.addShuffleMask(MaskAlloc);
}
MachineInstrBuilder
MachineIRBuilder::buildConcatVectors(const DstOp &Res, ArrayRef<Register> Ops) {
// Unfortunately to convert from ArrayRef<Register> to ArrayRef<SrcOp>,

View File

@ -444,7 +444,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.legalFor({{s32, s1}, {s64, s1}, {p0, s1}})
.clampScalar(0, s32, s64)
.widenScalarToNextPow2(0)
.minScalarEltSameAsIf(isVector(0), 1, 0)
.minScalarEltSameAsIf(all(isVector(0), isVector(1)), 1, 0)
.lowerIf(isVector(0));
// Pointer-handling

View File

@ -114,3 +114,47 @@ body: |
$q0 = COPY %5(<16 x s8>)
RET_ReallyLR implicit $q0
...
---
name: scalar_mask
alignment: 4
tracksRegLiveness: true
liveins:
- { reg: '$w0' }
- { reg: '$q0' }
body: |
bb.1:
liveins: $q0, $w0
; CHECK-LABEL: name: scalar_mask
; CHECK: liveins: $q0, $w0
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q0
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4100
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32)
; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32)
; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 1
; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[SEXT_INREG]](s32), [[C2]](s64)
; CHECK: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[IVEC]](<4 x s32>), [[DEF]], shufflemask(0, 0, 0, 0)
; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32)
; CHECK: [[XOR:%[0-9]+]]:_(<4 x s32>) = G_XOR [[SHUF]], [[BUILD_VECTOR1]]
; CHECK: [[AND:%[0-9]+]]:_(<4 x s32>) = G_AND [[COPY1]], [[SHUF]]
; CHECK: [[AND1:%[0-9]+]]:_(<4 x s32>) = G_AND [[BUILD_VECTOR]], [[XOR]]
; CHECK: [[OR:%[0-9]+]]:_(<4 x s32>) = G_OR [[AND]], [[AND1]]
; CHECK: $q0 = COPY [[OR]](<4 x s32>)
; CHECK: RET_ReallyLR implicit $q0
%0:_(s32) = COPY $w0
%1:_(<4 x s32>) = COPY $q0
%2:_(s32) = G_CONSTANT i32 4100
%6:_(s32) = G_FCONSTANT float 0.000000e+00
%5:_(<4 x s32>) = G_BUILD_VECTOR %6(s32), %6(s32), %6(s32), %6(s32)
%3:_(s1) = G_ICMP intpred(eq), %0(s32), %2
%4:_(<4 x s32>) = G_SELECT %3(s1), %1, %5
$q0 = COPY %4(<4 x s32>)
RET_ReallyLR implicit $q0
...