AMDGPU/GlobalISel: Legalize more concat_vectors

llvm-svn: 365488
This commit is contained in:
Matt Arsenault 2019-07-09 14:17:31 +00:00
parent 6bdb92d833
commit 4dd5755d01
3 changed files with 115 additions and 27 deletions

View File

@ -82,6 +82,21 @@ static LegalityPredicate numElementsNotEven(unsigned TypeIdx) {
}; };
} }
// Any combination of 32 or 64-bit elements up to 512 bits, and multiples of
// v2s16.
static LegalityPredicate isRegisterType(unsigned TypeIdx) {
return [=](const LegalityQuery &Query) {
const LLT Ty = Query.Types[TypeIdx];
if (Ty.isVector()) {
const int EltSize = Ty.getElementType().getSizeInBits();
return EltSize == 32 || EltSize == 64 ||
(EltSize == 16 && Ty.getNumElements() % 2 == 0);
}
return Ty.getSizeInBits() % 32 == 0 && Ty.getSizeInBits() <= 512;
};
}
AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
const GCNTargetMachine &TM) const GCNTargetMachine &TM)
: ST(ST_) { : ST(ST_) {
@ -102,7 +117,6 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
const LLT V2S16 = LLT::vector(2, 16); const LLT V2S16 = LLT::vector(2, 16);
const LLT V4S16 = LLT::vector(4, 16); const LLT V4S16 = LLT::vector(4, 16);
const LLT V8S16 = LLT::vector(8, 16);
const LLT V2S32 = LLT::vector(2, 32); const LLT V2S32 = LLT::vector(2, 32);
const LLT V3S32 = LLT::vector(3, 32); const LLT V3S32 = LLT::vector(3, 32);
@ -647,19 +661,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
Query.Types[0].getScalarSizeInBits() == 64; Query.Types[0].getScalarSizeInBits() == 64;
}); });
// TODO: Support any combination of s16, s32, s64, pointer vectors.
getActionDefinitionsBuilder(G_CONCAT_VECTORS) getActionDefinitionsBuilder(G_CONCAT_VECTORS)
.legalFor({{V4S32, V2S32}, .legalIf(isRegisterType(0));
{V8S32, V2S32},
{V8S32, V4S32},
{V4S64, V2S64},
{V4S16, V2S16},
{V8S16, V2S16},
{V8S16, V4S16},
{LLT::vector(4, LocalPtr), LLT::vector(2, LocalPtr)},
{LLT::vector(4, PrivatePtr), LLT::vector(2, PrivatePtr)}})
// FIXME: Should restrict maximum size, but there seems to be a missing predicate.
.legalIf(typeInSet(1, {V2S32, V4S32, V8S32,V2S16, V4S16, V8S16, LLT::vector(16, 16), V2S64}));
// Merge/Unmerge // Merge/Unmerge
for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) { for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {

View File

@ -639,10 +639,10 @@ body: |
liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GCN-LABEL: name: test_concat_vectors_s_v8s64_s_v4s64_s_v4s64 ; GCN-LABEL: name: test_concat_vectors_s_v8s64_s_v4s64_s_v4s64
; GCN: [[COPY:%[0-9]+]]:sgpr(<4 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GCN: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GCN: [[COPY1:%[0-9]+]]:sgpr(<4 x s64>) = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GCN: [[COPY1:%[0-9]+]]:sreg_256 = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GCN: [[CONCAT_VECTORS:%[0-9]+]]:sgpr(<8 x s64>) = G_CONCAT_VECTORS [[COPY]](<4 x s64>), [[COPY1]](<4 x s64>) ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_512 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, [[COPY1]], %subreg.sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15
; GCN: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[CONCAT_VECTORS]](<8 x s64>) ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[REG_SEQUENCE]]
%0:sgpr(<4 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, %0:sgpr(<4 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7,
%1:sgpr(<4 x s64>) = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:sgpr(<4 x s64>) = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
%4:sgpr(<8 x s64>) = G_CONCAT_VECTORS %0, %1 %4:sgpr(<8 x s64>) = G_CONCAT_VECTORS %0, %1
@ -683,10 +683,10 @@ body: |
liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7 liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7
; GCN-LABEL: name: test_concat_vectors_s_v4p1_s_v2p1_s_v2p1 ; GCN-LABEL: name: test_concat_vectors_s_v4p1_s_v2p1_s_v2p1
; GCN: [[COPY:%[0-9]+]]:sgpr(<2 x p1>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: [[COPY:%[0-9]+]]:sreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN: [[COPY1:%[0-9]+]]:sgpr(<2 x p1>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 ; GCN: [[COPY1:%[0-9]+]]:sreg_128 = COPY $sgpr4_sgpr5_sgpr6_sgpr7
; GCN: [[CONCAT_VECTORS:%[0-9]+]]:sgpr(<4 x p1>) = G_CONCAT_VECTORS [[COPY]](<2 x p1>), [[COPY1]](<2 x p1>) ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_256 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3, [[COPY1]], %subreg.sub4_sub5_sub6_sub7
; GCN: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[CONCAT_VECTORS]](<4 x p1>) ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[REG_SEQUENCE]]
%0:sgpr(<2 x p1>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %0:sgpr(<2 x p1>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:sgpr(<2 x p1>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(<2 x p1>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7
%3:sgpr(<4 x p1>) = G_CONCAT_VECTORS %0, %1 %3:sgpr(<4 x p1>) = G_CONCAT_VECTORS %0, %1
@ -723,12 +723,12 @@ body: |
liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7 liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7
; GCN-LABEL: name: test_concat_vectors_s_v8p3_s_v2p3_s_v2p3_v2p3_s_v2p3 ; GCN-LABEL: name: test_concat_vectors_s_v8p3_s_v2p3_s_v2p3_v2p3_s_v2p3
; GCN: [[COPY:%[0-9]+]]:sgpr(<2 x p3>) = COPY $sgpr0_sgpr1 ; GCN: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
; GCN: [[COPY1:%[0-9]+]]:sgpr(<2 x p3>) = COPY $sgpr2_sgpr3 ; GCN: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
; GCN: [[COPY2:%[0-9]+]]:sgpr(<2 x p3>) = COPY $sgpr4_sgpr5 ; GCN: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5
; GCN: [[COPY3:%[0-9]+]]:sgpr(<2 x p3>) = COPY $sgpr6_sgpr7 ; GCN: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY $sgpr6_sgpr7
; GCN: [[CONCAT_VECTORS:%[0-9]+]]:sgpr(<8 x p3>) = G_CONCAT_VECTORS [[COPY]](<2 x p3>), [[COPY1]](<2 x p3>), [[COPY2]](<2 x p3>), [[COPY3]](<2 x p3>) ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_256 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5, [[COPY3]], %subreg.sub6_sub7
; GCN: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[CONCAT_VECTORS]](<8 x p3>) ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[REG_SEQUENCE]]
%0:sgpr(<2 x p3>) = COPY $sgpr0_sgpr1 %0:sgpr(<2 x p3>) = COPY $sgpr0_sgpr1
%1:sgpr(<2 x p3>) = COPY $sgpr2_sgpr3 %1:sgpr(<2 x p3>) = COPY $sgpr2_sgpr3
%2:sgpr(<2 x p3>) = COPY $sgpr4_sgpr5 %2:sgpr(<2 x p3>) = COPY $sgpr4_sgpr5

View File

@ -127,3 +127,88 @@ body: |
%2:_(<4 x s64>) = G_CONCAT_VECTORS %0, %1 %2:_(<4 x s64>) = G_CONCAT_VECTORS %0, %1
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %2 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %2
... ...
---
name: concat_vectors_v2p1_v2p1
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7
; CHECK-LABEL: name: concat_vectors_v2p1_v2p1
; CHECK: [[COPY:%[0-9]+]]:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x p1>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x p1>) = G_CONCAT_VECTORS [[COPY]](<2 x p1>), [[COPY1]](<2 x p1>)
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x p1>)
%0:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(<2 x p1>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
%2:_(<4 x p1>) = G_CONCAT_VECTORS %0, %1
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %2
...
---
name: concat_vectors_v2p0_v2p0
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7
; CHECK-LABEL: name: concat_vectors_v2p0_v2p0
; CHECK: [[COPY:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x p0>) = G_CONCAT_VECTORS [[COPY]](<2 x p0>), [[COPY1]](<2 x p0>)
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x p0>)
%0:_(<2 x p0>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(<2 x p0>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
%2:_(<4 x p0>) = G_CONCAT_VECTORS %0, %1
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %2
...
---
name: concat_vectors_v2p3_v2p3
body: |
bb.0:
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; CHECK-LABEL: name: concat_vectors_v2p3_v2p3
; CHECK: [[COPY:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr0_vgpr1
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x p3>) = G_CONCAT_VECTORS [[COPY]](<2 x p3>), [[COPY1]](<2 x p3>)
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x p3>)
%0:_(<2 x p3>) = COPY $vgpr0_vgpr1
%1:_(<2 x p3>) = COPY $vgpr2_vgpr3
%2:_(<4 x p3>) = G_CONCAT_VECTORS %0, %1
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2
...
---
name: concat_vectors_v2p5_v2p5
body: |
bb.0:
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; CHECK-LABEL: name: concat_vectors_v2p5_v2p5
; CHECK: [[COPY:%[0-9]+]]:_(<2 x p5>) = COPY $vgpr0_vgpr1
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x p5>) = COPY $vgpr2_vgpr3
; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x p5>) = G_CONCAT_VECTORS [[COPY]](<2 x p5>), [[COPY1]](<2 x p5>)
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x p5>)
%0:_(<2 x p5>) = COPY $vgpr0_vgpr1
%1:_(<2 x p5>) = COPY $vgpr2_vgpr3
%2:_(<4 x p5>) = G_CONCAT_VECTORS %0, %1
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2
...
---
name: concat_vectors_v2p999_v2p999
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7
; CHECK-LABEL: name: concat_vectors_v2p999_v2p999
; CHECK: [[COPY:%[0-9]+]]:_(<2 x p999>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x p999>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x p999>) = G_CONCAT_VECTORS [[COPY]](<2 x p999>), [[COPY1]](<2 x p999>)
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x p999>)
%0:_(<2 x p999>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(<2 x p999>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
%2:_(<4 x p999>) = G_CONCAT_VECTORS %0, %1
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %2
...