[AArch64][GlobalISel] Legalize oversize G_EXTRACT_VECTOR_ELT sources.

Also changes the fewerElements helper to use the lookthrough constant helper
instead of m_ICst, since m_ICst doesn't look through extends.

Differential Revision: https://reviews.llvm.org/D103227
This commit is contained in:
Amara Emerson 2021-05-26 23:28:44 -07:00
parent 6a2af607ad
commit 59a4ee9728
4 changed files with 108 additions and 3 deletions

View File

@ -3807,7 +3807,11 @@ LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
// If the index is a constant, we can really break this down as you would
// expect, and index into the target size pieces.
int64_t IdxVal;
if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
auto MaybeCst =
getConstantVRegValWithLookThrough(Idx, MRI, /*LookThroughInstrs*/ true,
/*HandleFConstants*/ false);
if (MaybeCst) {
IdxVal = MaybeCst->Value.getSExtValue();
// Avoid out of bounds indexing the pieces.
if (IdxVal >= VecTy.getNumElements()) {
MIRBuilder.buildUndef(DstReg);

View File

@ -639,7 +639,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
return Query.Types[1].getNumElements() <= 16;
},
0, s8)
.minScalarOrElt(0, s8); // Worst case, we need at least s8.
.minScalarOrElt(0, s8) // Worst case, we need at least s8.
.clampMaxNumElements(1, s64, 2)
.clampMaxNumElements(1, s32, 4)
.clampMaxNumElements(1, s16, 8);
getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
.legalIf(typeInSet(0, {v8s16, v2s32, v4s32, v2s64}));

View File

@ -116,7 +116,7 @@ define void @nonpow2_load_narrowing() {
; Currently can't handle vector lengths that aren't an exact multiple of
; natively supported vector lengths. Test that the fall-back works for those.
; FALLBACK-WITH-REPORT-ERR-G_IMPLICIT_DEF-LEGALIZABLE: (FIXME: this is what is expected once we can legalize non-pow-of-2 G_IMPLICIT_DEF) remark: <unknown>:0:0: unable to legalize instruction: %1:_(<7 x s64>) = G_ADD %0, %0 (in function: nonpow2_vector_add_fewerelements
; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: %{{[0-9]+}}:_(s64) = G_EXTRACT_VECTOR_ELT %{{[0-9]+}}:_(<7 x s64>), %{{[0-9]+}}:_(s64) (in function: nonpow2_vector_add_fewerelements)
; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: %47:_(<14 x s64>) = G_CONCAT_VECTORS %41:_(<2 x s64>), %42:_(<2 x s64>), %43:_(<2 x s64>), %44:_(<2 x s64>), %29:_(<2 x s64>), %29:_(<2 x s64>), %29:_(<2 x s64>) (in function: nonpow2_vector_add_fewerelements)
; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_vector_add_fewerelements
; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_vector_add_fewerelements:
define void @nonpow2_vector_add_fewerelements() {

View File

@ -147,3 +147,101 @@ body: |
$x0 = COPY %3(p0)
RET_ReallyLR
...
---
name: test_eve_v4s64
body: |
bb.0:
liveins: $q0, $q1, $x0
; CHECK-LABEL: name: test_eve_v4s64
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
; CHECK: %idx:_(s32) = G_CONSTANT i32 1
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; CHECK: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s64>), [[C]](s64)
; CHECK: $x0 = COPY [[EVEC]](s64)
; CHECK: RET_ReallyLR
%0:_(<2 x s64>) = COPY $q0
%1:_(<2 x s64>) = COPY $q1
%concat:_(<4 x s64>) = G_CONCAT_VECTORS %0(<2 x s64>), %1(<2 x s64>)
%idx:_(s32) = G_CONSTANT i32 1
%idxprom:_(s64) = G_SEXT %idx(s32)
%3:_(s64) = G_EXTRACT_VECTOR_ELT %concat:_(<4 x s64>), %idxprom:_(s64)
$x0 = COPY %3(s64)
RET_ReallyLR
...
---
name: test_eve_v4s64_unknown_idx
body: |
bb.0:
liveins: $q0, $q1, $x0
; CHECK-LABEL: name: test_eve_v4s64_unknown_idx
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
; CHECK: %idx:_(s64) = COPY $x0
; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
; CHECK: G_STORE [[COPY]](<2 x s64>), [[FRAME_INDEX]](p0) :: (store 16 into %stack.0, align 32)
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C]](s64)
; CHECK: G_STORE [[COPY1]](<2 x s64>), [[PTR_ADD]](p0) :: (store 16 into %stack.0 + 16, basealign 32)
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND %idx, [[C1]]
; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND]], [[C2]]
; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[MUL]](s64)
; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p0) :: (load 8)
; CHECK: $x0 = COPY [[LOAD]](s64)
; CHECK: RET_ReallyLR
%0:_(<2 x s64>) = COPY $q0
%1:_(<2 x s64>) = COPY $q1
%concat:_(<4 x s64>) = G_CONCAT_VECTORS %0(<2 x s64>), %1(<2 x s64>)
%idx:_(s64) = COPY $x0
%3:_(s64) = G_EXTRACT_VECTOR_ELT %concat:_(<4 x s64>), %idx:_(s64)
$x0 = COPY %3(s64)
RET_ReallyLR
...
---
name: test_eve_v8s32
body: |
bb.0:
liveins: $q0, $q1, $x0
; CHECK-LABEL: name: test_eve_v8s32
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
; CHECK: %idx:_(s32) = G_CONSTANT i32 1
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64)
; CHECK: $w0 = COPY [[EVEC]](s32)
; CHECK: RET_ReallyLR
%0:_(<4 x s32>) = COPY $q0
%1:_(<4 x s32>) = COPY $q1
%concat:_(<8 x s32>) = G_CONCAT_VECTORS %0(<4 x s32>), %1(<4 x s32>)
%idx:_(s32) = G_CONSTANT i32 1
%idxprom:_(s64) = G_SEXT %idx(s32)
%3:_(s32) = G_EXTRACT_VECTOR_ELT %concat:_(<8 x s32>), %idxprom:_(s64)
$w0 = COPY %3(s32)
RET_ReallyLR
...
---
name: test_eve_v16s16
body: |
bb.0:
liveins: $q0, $q1, $x0
; CHECK-LABEL: name: test_eve_v16s16
; CHECK: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $q1
; CHECK: %idx:_(s32) = G_CONSTANT i32 9
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; CHECK: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY1]](<8 x s16>), [[C]](s64)
; CHECK: %ext:_(s32) = G_ANYEXT [[EVEC]](s16)
; CHECK: $w0 = COPY %ext(s32)
; CHECK: RET_ReallyLR
%0:_(<8 x s16>) = COPY $q0
%1:_(<8 x s16>) = COPY $q1
%concat:_(<16 x s16>) = G_CONCAT_VECTORS %0(<8 x s16>), %1(<8 x s16>)
%idx:_(s32) = G_CONSTANT i32 9
%idxprom:_(s64) = G_SEXT %idx(s32)
%3:_(s16) = G_EXTRACT_VECTOR_ELT %concat:_(<16 x s16>), %idxprom:_(s64)
%ext:_(s32) = G_ANYEXT %3
$w0 = COPY %ext(s32)
RET_ReallyLR
...