[GISel][ArtifactCombiner] Relax the constraint to combine unmerge with concat_vectors

The combine G_UNMERGE_VALUES with G_CONCAT_VECTORS used to only be performed
when the result type of the G_UNMERGE_VALUES was a vector type.
In other words, we were expecting that the G_UNMERGE_VALUES was effectively
the exact opposite of the G_CONCAT_VECTORS.

Lift that constraint by allowing any G_UNMERGE_VALUES to be combined
with any G_CONCAT_VECTORS (as long as the size of the different pieces
that we merge/unmerge match).

Differential Revision: https://reviews.llvm.org/D69288
This commit is contained in:
Quentin Colombet 2019-11-06 11:09:12 -08:00
parent 6da58e7e0f
commit 52af7aedfe
5 changed files with 214 additions and 85 deletions

View File

@ -219,31 +219,53 @@ public:
static unsigned canFoldMergeOpcode(unsigned MergeOp, unsigned ConvertOp,
LLT OpTy, LLT DestTy) {
if (OpTy.isVector() && DestTy.isVector())
return MergeOp == TargetOpcode::G_CONCAT_VECTORS;
if (OpTy.isVector() && !DestTy.isVector()) {
if (MergeOp == TargetOpcode::G_BUILD_VECTOR)
return true;
if (MergeOp == TargetOpcode::G_CONCAT_VECTORS) {
if (ConvertOp == 0)
return true;
const unsigned OpEltSize = OpTy.getElementType().getSizeInBits();
// Don't handle scalarization with a cast that isn't in the same
// direction as the vector cast. This could be handled, but it would
// require more intermediate unmerges.
if (ConvertOp == TargetOpcode::G_TRUNC)
return DestTy.getSizeInBits() <= OpEltSize;
return DestTy.getSizeInBits() >= OpEltSize;
}
// Check if we found a definition that is like G_MERGE_VALUES.
switch (MergeOp) {
default:
return false;
}
case TargetOpcode::G_BUILD_VECTOR:
case TargetOpcode::G_MERGE_VALUES:
// The convert operation that we will need to insert is
// going to convert the input of that type of instruction (scalar)
// to the destination type (DestTy).
// The conversion needs to stay in the same domain (scalar to scalar
// and vector to vector), so if we were to allow to fold the merge
// we would need to insert some bitcasts.
// E.g.,
// <2 x s16> = build_vector s16, s16
// <2 x s32> = zext <2 x s16>
// <2 x s16>, <2 x s16> = unmerge <2 x s32>
//
// As is the folding would produce:
// <2 x s16> = zext s16 <-- scalar to vector
// <2 x s16> = zext s16 <-- scalar to vector
// Which is invalid.
// Instead we would want to generate:
// s32 = zext s16
// <2 x s16> = bitcast s32
// s32 = zext s16
// <2 x s16> = bitcast s32
//
// That is not done yet.
if (ConvertOp == 0)
return true;
return !DestTy.isVector();
case TargetOpcode::G_CONCAT_VECTORS: {
if (ConvertOp == 0)
return true;
if (!DestTy.isVector())
return false;
return MergeOp == TargetOpcode::G_MERGE_VALUES;
const unsigned OpEltSize = OpTy.getElementType().getSizeInBits();
// Don't handle scalarization with a cast that isn't in the same
// direction as the vector cast. This could be handled, but it would
// require more intermediate unmerges.
if (ConvertOp == TargetOpcode::G_TRUNC)
return DestTy.getSizeInBits() <= OpEltSize;
return DestTy.getSizeInBits() >= OpEltSize;
}
}
}
bool tryCombineMerges(MachineInstr &MI,
@ -335,6 +357,10 @@ public:
} else {
LLT MergeSrcTy = MRI.getType(MergeI->getOperand(1).getReg());
if (!ConvertOp && DestTy != MergeSrcTy)
ConvertOp = TargetOpcode::G_BITCAST;
if (ConvertOp) {
Builder.setInstr(MI);
@ -347,10 +373,10 @@ public:
markInstAndDefDead(MI, *MergeI, DeadInsts);
return true;
}
// FIXME: is a COPY appropriate if the types mismatch? We know both
// registers are allocatable by now.
if (DestTy != MergeSrcTy)
return false;
assert(DestTy == MergeSrcTy &&
"Bitcast and the other kinds of conversions should "
"have happened earlier");
for (unsigned Idx = 0; Idx < NumDefs; ++Idx)
MRI.replaceRegWith(MI.getOperand(Idx).getReg(),

View File

@ -0,0 +1,25 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -mtriple aarch64-apple-ios -stop-after=instruction-select %s -o - | FileCheck %s
; Check that packing incoming arguments into a big vector type
; and unpacking them in registers for the call to @bar gets selected as just
; simple copies. I.e., we don't artificial try to keep the big
; vector (%vec) alive.
define void @shuffle_to_concat_vector(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: name: shuffle_to_concat_vector
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $q0, $q1
; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
; CHECK: $q0 = COPY [[COPY]]
; CHECK: $q1 = COPY [[COPY1]]
; CHECK: BL @bar, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $q0, implicit $q1
; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
; CHECK: RET_ReallyLR
%vec = shufflevector <2 x i64> %a, <2 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
call void @bar(<4 x i64> %vec)
ret void
}
declare void @bar(<4 x i64> %vec)

View File

@ -1,15 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -O0 -run-pass=legalizer %s -o - | FileCheck %s
--- |
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--"
define void @test_unmerge() {
entry:
ret void
}
define void @test_legal_const_ext() { ret void }
...
# RUN: llc -O0 -mtriple aarch64-- -run-pass=legalizer %s -o - | FileCheck %s
---
name: test_unmerge
@ -44,3 +34,60 @@ body: |
%4:_(s32) = G_ANYEXT %3(s1)
$w0 = COPY %4(s32)
...
# Check that the artifact combiner can get rid of the big
# vector type (4 x s64) by combining the G_UNMERGE_VALUES
# with the G_CONCAT_VECTORS and turning that into bitcast.
---
name: concat_vectors_unmerge_to_bitcast
tracksRegLiveness: true
body: |
bb.0:
liveins: $q0, $q1
; CHECK-LABEL: name: concat_vectors_unmerge_to_bitcast
; CHECK: liveins: $q0, $q1
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
; CHECK: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[COPY]](<2 x s64>)
; CHECK: [[BITCAST1:%[0-9]+]]:_(s128) = G_BITCAST [[COPY1]](<2 x s64>)
; CHECK: $q0 = COPY [[BITCAST]](s128)
; CHECK: $q1 = COPY [[BITCAST1]](s128)
%0:_(<2 x s64>) = COPY $q0
%1:_(<2 x s64>) = COPY $q1
%2:_(<4 x s64>) = G_CONCAT_VECTORS %0(<2 x s64>), %1(<2 x s64>)
%3:_(s128), %4:_(s128) = G_UNMERGE_VALUES %2(<4 x s64>)
$q0 = COPY %3(s128)
$q1 = COPY %4(s128)
...
# Check that the artifact combiner can get rid of the big
# vector type (4 x s64) by combining the G_UNMERGE_VALUES
# with the G_CONCAT_VECTORS and turning that into smaller
# 2x64-bit G_UNMERGE_VALUES.
---
name: concat_vectors_unmerge_to_unmerge
tracksRegLiveness: true
body: |
bb.0:
liveins: $q0, $q1
; CHECK-LABEL: name: concat_vectors_unmerge_to_unmerge
; CHECK: liveins: $q0, $q1
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
; CHECK: $x0 = COPY [[UV]](s64)
; CHECK: $x1 = COPY [[UV1]](s64)
; CHECK: $x2 = COPY [[UV2]](s64)
; CHECK: $x3 = COPY [[UV3]](s64)
%0:_(<2 x s64>) = COPY $q0
%1:_(<2 x s64>) = COPY $q1
%2:_(<4 x s64>) = G_CONCAT_VECTORS %0(<2 x s64>), %1(<2 x s64>)
%3:_(s64), %4:_(s64), %5:_(s64), %6:_(s64) = G_UNMERGE_VALUES %2(<4 x s64>)
$x0 = COPY %3(s64)
$x1 = COPY %4(s64)
$x2 = COPY %5(s64)
$x3 = COPY %6(s64)
...

View File

@ -225,25 +225,17 @@ body: |
; CHECK-LABEL: name: test_unmerge_values_s1_trunc_v4s1_of_concat_vectors_v4s32_v2s32
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32)
; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32)
; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32)
; CHECK: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32)
; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32)
; CHECK: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C]](s32)
; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32)
; CHECK: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[C]](s32)
; CHECK: $vgpr0 = COPY [[ASHR]](s32)
; CHECK: $vgpr1 = COPY [[ASHR1]](s32)
; CHECK: $vgpr2 = COPY [[ASHR2]](s32)
; CHECK: $vgpr3 = COPY [[ASHR3]](s32)
; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[COPY]](<2 x s32>), [[COPY1]](<2 x s32>)
; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s1>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s32>)
; CHECK: [[UV:%[0-9]+]]:_(s1), [[UV1:%[0-9]+]]:_(s1), [[UV2:%[0-9]+]]:_(s1), [[UV3:%[0-9]+]]:_(s1) = G_UNMERGE_VALUES [[TRUNC]](<4 x s1>)
; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[UV]](s1)
; CHECK: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[UV1]](s1)
; CHECK: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[UV2]](s1)
; CHECK: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[UV3]](s1)
; CHECK: $vgpr0 = COPY [[SEXT]](s32)
; CHECK: $vgpr1 = COPY [[SEXT1]](s32)
; CHECK: $vgpr2 = COPY [[SEXT2]](s32)
; CHECK: $vgpr3 = COPY [[SEXT3]](s32)
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
%1:_(<2 x s32>) = COPY $vgpr2_vgpr3
%2:_(<4 x s32>) = G_CONCAT_VECTORS %0, %1
@ -324,13 +316,10 @@ body: |
; CHECK-LABEL: name: test_unmerge_values_s32_of_trunc_concat_vectors_v2s64_v2s64
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV]](s64)
; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s64)
; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
; CHECK: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64)
; CHECK: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64)
; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s32), implicit [[TRUNC1]](s32), implicit [[TRUNC2]](s32), implicit [[TRUNC3]](s32)
; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[COPY]](<2 x s64>), [[COPY1]](<2 x s64>)
; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s32>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s64>)
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](<4 x s32>)
; CHECK: S_ENDPGM 0, implicit [[UV]](s32), implicit [[UV1]](s32), implicit [[UV2]](s32), implicit [[UV3]](s32)
%0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
%2:_(<4 x s64>) = G_CONCAT_VECTORS %0, %1
@ -347,12 +336,14 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[UV]](s32)
; CHECK: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[UV1]](s32)
; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
; CHECK: [[SEXT2:%[0-9]+]]:_(s64) = G_SEXT [[UV2]](s32)
; CHECK: [[SEXT3:%[0-9]+]]:_(s64) = G_SEXT [[UV3]](s32)
; CHECK: S_ENDPGM 0, implicit [[SEXT]](s64), implicit [[SEXT1]](s64), implicit [[SEXT2]](s64), implicit [[SEXT3]](s64)
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[SEXT]](s64), [[SEXT1]](s64), [[SEXT2]](s64), [[SEXT3]](s64)
; CHECK: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s64>)
; CHECK: S_ENDPGM 0, implicit [[UV4]](s64), implicit [[UV5]](s64), implicit [[UV6]](s64), implicit [[UV7]](s64)
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
%1:_(<2 x s32>) = COPY $vgpr2_vgpr3
%2:_(<4 x s32>) = G_CONCAT_VECTORS %0, %1
@ -369,12 +360,14 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV]](s32)
; CHECK: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV1]](s32)
; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
; CHECK: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[UV2]](s32)
; CHECK: [[ZEXT3:%[0-9]+]]:_(s64) = G_ZEXT [[UV3]](s32)
; CHECK: S_ENDPGM 0, implicit [[ZEXT]](s64), implicit [[ZEXT1]](s64), implicit [[ZEXT2]](s64), implicit [[ZEXT3]](s64)
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ZEXT]](s64), [[ZEXT1]](s64), [[ZEXT2]](s64), [[ZEXT3]](s64)
; CHECK: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s64>)
; CHECK: S_ENDPGM 0, implicit [[UV4]](s64), implicit [[UV5]](s64), implicit [[UV6]](s64), implicit [[UV7]](s64)
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
%1:_(<2 x s32>) = COPY $vgpr2_vgpr3
%2:_(<4 x s32>) = G_CONCAT_VECTORS %0, %1
@ -391,12 +384,14 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s32)
; CHECK: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV1]](s32)
; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
; CHECK: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV2]](s32)
; CHECK: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[UV3]](s32)
; CHECK: S_ENDPGM 0, implicit [[ANYEXT]](s64), implicit [[ANYEXT1]](s64), implicit [[ANYEXT2]](s64), implicit [[ANYEXT3]](s64)
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ANYEXT]](s64), [[ANYEXT1]](s64), [[ANYEXT2]](s64), [[ANYEXT3]](s64)
; CHECK: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s64>)
; CHECK: S_ENDPGM 0, implicit [[UV4]](s64), implicit [[UV5]](s64), implicit [[UV6]](s64), implicit [[UV7]](s64)
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
%1:_(<2 x s32>) = COPY $vgpr2_vgpr3
%2:_(<4 x s32>) = G_CONCAT_VECTORS %0, %1
@ -412,13 +407,10 @@ body: |
; CHECK-LABEL: name: test_unmerge_values_s8_of_trunc_v4s16_concat_vectors_v2s32_v2s32
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s32)
; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s32)
; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s32)
; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s32)
; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s8), implicit [[TRUNC1]](s8), implicit %6:_(s8), implicit %7:_(s8), implicit [[TRUNC2]](s8), implicit [[TRUNC3]](s8), implicit %10:_(s8), implicit %11:_(s8)
; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[COPY]](<2 x s32>), [[COPY1]](<2 x s32>)
; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s32>)
; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC]](<4 x s16>)
; CHECK: S_ENDPGM 0, implicit [[UV]](s8), implicit [[UV1]](s8), implicit [[UV2]](s8), implicit [[UV3]](s8), implicit [[UV4]](s8), implicit [[UV5]](s8), implicit [[UV6]](s8), implicit [[UV7]](s8)
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
%1:_(<2 x s32>) = COPY $vgpr2_vgpr3
%2:_(<4 x s32>) = G_CONCAT_VECTORS %0, %1
@ -497,3 +489,42 @@ body: |
%6:_(s32), %7:_(s32) = G_UNMERGE_VALUES %5
S_ENDPGM 0, implicit %6, implicit %7
...
# To properly simplify that one, we would need to insert bitcast
# after the G_ZEXT.
# i.e.,
# s64 = zext <2 x s16> <-- invalid
# vs.
# <2 x s32> = zext <2 x s16>
# s64 = bitcast <2 x s32> <-- we are missing the code to do that
---
name: test_unmerge_values_s128_of_zext_of_concat_vectors
body: |
bb.0:
; CHECK-LABEL: name: test_unmerge_values_s128_of_zext_of_concat_vectors
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32)
; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
; CHECK: S_ENDPGM 0, implicit [[UV]](s64), implicit [[UV1]](s64)
%0:_(<2 x s16>) = COPY $vgpr0
%1:_(<2 x s16>) = COPY $vgpr1
%2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1
%3:_(<4 x s32>) = G_ZEXT %2
%4:_(s64), %5:_(s64) = G_UNMERGE_VALUES %3
S_ENDPGM 0, implicit %4, implicit %5
...

View File

@ -713,16 +713,16 @@ body: |
; CHECK: [[ADD61:%[0-9]+]]:_(s32) = G_ADD [[UV61]], [[UV125]]
; CHECK: [[ADD62:%[0-9]+]]:_(s32) = G_ADD [[UV62]], [[UV126]]
; CHECK: [[ADD63:%[0-9]+]]:_(s32) = G_ADD [[UV63]], [[UV127]]
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32), [[ADD8]](s32), [[ADD9]](s32), [[ADD10]](s32), [[ADD11]](s32), [[ADD12]](s32), [[ADD13]](s32), [[ADD14]](s32), [[ADD15]](s32), [[ADD16]](s32), [[ADD17]](s32), [[ADD18]](s32), [[ADD19]](s32), [[ADD20]](s32), [[ADD21]](s32), [[ADD22]](s32), [[ADD23]](s32), [[ADD24]](s32), [[ADD25]](s32), [[ADD26]](s32), [[ADD27]](s32), [[ADD28]](s32), [[ADD29]](s32), [[ADD30]](s32), [[ADD31]](s32)
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[ADD32]](s32), [[ADD33]](s32), [[ADD34]](s32), [[ADD35]](s32), [[ADD36]](s32), [[ADD37]](s32), [[ADD38]](s32), [[ADD39]](s32), [[ADD40]](s32), [[ADD41]](s32), [[ADD42]](s32), [[ADD43]](s32), [[ADD44]](s32), [[ADD45]](s32), [[ADD46]](s32), [[ADD47]](s32), [[ADD48]](s32), [[ADD49]](s32), [[ADD50]](s32), [[ADD51]](s32), [[ADD52]](s32), [[ADD53]](s32), [[ADD54]](s32), [[ADD55]](s32), [[ADD56]](s32), [[ADD57]](s32), [[ADD58]](s32), [[ADD59]](s32), [[ADD60]](s32), [[ADD61]](s32), [[ADD62]](s32), [[ADD63]](s32)
; CHECK: [[UV128:%[0-9]+]]:_(<16 x s32>), [[UV129:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>)
; CHECK: [[UV130:%[0-9]+]]:_(<16 x s32>), [[UV131:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<32 x s32>)
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32), [[ADD8]](s32), [[ADD9]](s32), [[ADD10]](s32), [[ADD11]](s32), [[ADD12]](s32), [[ADD13]](s32), [[ADD14]](s32), [[ADD15]](s32)
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD16]](s32), [[ADD17]](s32), [[ADD18]](s32), [[ADD19]](s32), [[ADD20]](s32), [[ADD21]](s32), [[ADD22]](s32), [[ADD23]](s32), [[ADD24]](s32), [[ADD25]](s32), [[ADD26]](s32), [[ADD27]](s32), [[ADD28]](s32), [[ADD29]](s32), [[ADD30]](s32), [[ADD31]](s32)
; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD32]](s32), [[ADD33]](s32), [[ADD34]](s32), [[ADD35]](s32), [[ADD36]](s32), [[ADD37]](s32), [[ADD38]](s32), [[ADD39]](s32), [[ADD40]](s32), [[ADD41]](s32), [[ADD42]](s32), [[ADD43]](s32), [[ADD44]](s32), [[ADD45]](s32), [[ADD46]](s32), [[ADD47]](s32)
; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD48]](s32), [[ADD49]](s32), [[ADD50]](s32), [[ADD51]](s32), [[ADD52]](s32), [[ADD53]](s32), [[ADD54]](s32), [[ADD55]](s32), [[ADD56]](s32), [[ADD57]](s32), [[ADD58]](s32), [[ADD59]](s32), [[ADD60]](s32), [[ADD61]](s32), [[ADD62]](s32), [[ADD63]](s32)
; CHECK: G_BR %bb.2
; CHECK: bb.2:
; CHECK: [[PHI:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[UV128]](<16 x s32>), %bb.1
; CHECK: [[PHI1:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF1]](<16 x s32>), %bb.0, [[UV129]](<16 x s32>), %bb.1
; CHECK: [[PHI2:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF2]](<16 x s32>), %bb.0, [[UV130]](<16 x s32>), %bb.1
; CHECK: [[PHI3:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF3]](<16 x s32>), %bb.0, [[UV131]](<16 x s32>), %bb.1
; CHECK: [[PHI:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[BUILD_VECTOR]](<16 x s32>), %bb.1
; CHECK: [[PHI1:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF1]](<16 x s32>), %bb.0, [[BUILD_VECTOR1]](<16 x s32>), %bb.1
; CHECK: [[PHI2:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF2]](<16 x s32>), %bb.0, [[BUILD_VECTOR2]](<16 x s32>), %bb.1
; CHECK: [[PHI3:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF3]](<16 x s32>), %bb.0, [[BUILD_VECTOR3]](<16 x s32>), %bb.1
; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[PHI]](<16 x s32>), [[PHI1]](<16 x s32>), [[PHI2]](<16 x s32>), [[PHI3]](<16 x s32>)
; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[CONCAT_VECTORS]](<64 x s32>)
bb.0: