forked from OSchip/llvm-project
[GISel][ArtifactCombiner] Relax the constraint to combine unmerge with concat_vectors
The combine G_UNMERGE_VALUES with G_CONCAT_VECTORS used to only be performed when the result type of the G_UNMERGE_VALUES was a vector type. In other words, we were expecting that the G_UNMERGE_VALUES was effectively the exact opposite of the G_CONCAT_VECTORS. Lift that constraint by allowing any G_UNMERGE_VALUES to be combined with any G_CONCAT_VECTORS (as long as the size of the different pieces that we merge/unmerge match). Differential Revision: https://reviews.llvm.org/D69288
This commit is contained in:
parent
6da58e7e0f
commit
52af7aedfe
|
@ -219,31 +219,53 @@ public:
|
|||
|
||||
static unsigned canFoldMergeOpcode(unsigned MergeOp, unsigned ConvertOp,
|
||||
LLT OpTy, LLT DestTy) {
|
||||
if (OpTy.isVector() && DestTy.isVector())
|
||||
return MergeOp == TargetOpcode::G_CONCAT_VECTORS;
|
||||
|
||||
if (OpTy.isVector() && !DestTy.isVector()) {
|
||||
if (MergeOp == TargetOpcode::G_BUILD_VECTOR)
|
||||
return true;
|
||||
|
||||
if (MergeOp == TargetOpcode::G_CONCAT_VECTORS) {
|
||||
if (ConvertOp == 0)
|
||||
return true;
|
||||
|
||||
const unsigned OpEltSize = OpTy.getElementType().getSizeInBits();
|
||||
|
||||
// Don't handle scalarization with a cast that isn't in the same
|
||||
// direction as the vector cast. This could be handled, but it would
|
||||
// require more intermediate unmerges.
|
||||
if (ConvertOp == TargetOpcode::G_TRUNC)
|
||||
return DestTy.getSizeInBits() <= OpEltSize;
|
||||
return DestTy.getSizeInBits() >= OpEltSize;
|
||||
}
|
||||
|
||||
// Check if we found a definition that is like G_MERGE_VALUES.
|
||||
switch (MergeOp) {
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
case TargetOpcode::G_BUILD_VECTOR:
|
||||
case TargetOpcode::G_MERGE_VALUES:
|
||||
// The convert operation that we will need to insert is
|
||||
// going to convert the input of that type of instruction (scalar)
|
||||
// to the destination type (DestTy).
|
||||
// The conversion needs to stay in the same domain (scalar to scalar
|
||||
// and vector to vector), so if we were to allow to fold the merge
|
||||
// we would need to insert some bitcasts.
|
||||
// E.g.,
|
||||
// <2 x s16> = build_vector s16, s16
|
||||
// <2 x s32> = zext <2 x s16>
|
||||
// <2 x s16>, <2 x s16> = unmerge <2 x s32>
|
||||
//
|
||||
// As is the folding would produce:
|
||||
// <2 x s16> = zext s16 <-- scalar to vector
|
||||
// <2 x s16> = zext s16 <-- scalar to vector
|
||||
// Which is invalid.
|
||||
// Instead we would want to generate:
|
||||
// s32 = zext s16
|
||||
// <2 x s16> = bitcast s32
|
||||
// s32 = zext s16
|
||||
// <2 x s16> = bitcast s32
|
||||
//
|
||||
// That is not done yet.
|
||||
if (ConvertOp == 0)
|
||||
return true;
|
||||
return !DestTy.isVector();
|
||||
case TargetOpcode::G_CONCAT_VECTORS: {
|
||||
if (ConvertOp == 0)
|
||||
return true;
|
||||
if (!DestTy.isVector())
|
||||
return false;
|
||||
|
||||
return MergeOp == TargetOpcode::G_MERGE_VALUES;
|
||||
const unsigned OpEltSize = OpTy.getElementType().getSizeInBits();
|
||||
|
||||
// Don't handle scalarization with a cast that isn't in the same
|
||||
// direction as the vector cast. This could be handled, but it would
|
||||
// require more intermediate unmerges.
|
||||
if (ConvertOp == TargetOpcode::G_TRUNC)
|
||||
return DestTy.getSizeInBits() <= OpEltSize;
|
||||
return DestTy.getSizeInBits() >= OpEltSize;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool tryCombineMerges(MachineInstr &MI,
|
||||
|
@ -335,6 +357,10 @@ public:
|
|||
|
||||
} else {
|
||||
LLT MergeSrcTy = MRI.getType(MergeI->getOperand(1).getReg());
|
||||
|
||||
if (!ConvertOp && DestTy != MergeSrcTy)
|
||||
ConvertOp = TargetOpcode::G_BITCAST;
|
||||
|
||||
if (ConvertOp) {
|
||||
Builder.setInstr(MI);
|
||||
|
||||
|
@ -347,10 +373,10 @@ public:
|
|||
markInstAndDefDead(MI, *MergeI, DeadInsts);
|
||||
return true;
|
||||
}
|
||||
// FIXME: is a COPY appropriate if the types mismatch? We know both
|
||||
// registers are allocatable by now.
|
||||
if (DestTy != MergeSrcTy)
|
||||
return false;
|
||||
|
||||
assert(DestTy == MergeSrcTy &&
|
||||
"Bitcast and the other kinds of conversions should "
|
||||
"have happened earlier");
|
||||
|
||||
for (unsigned Idx = 0; Idx < NumDefs; ++Idx)
|
||||
MRI.replaceRegWith(MI.getOperand(Idx).getReg(),
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple aarch64-apple-ios -stop-after=instruction-select %s -o - | FileCheck %s
|
||||
|
||||
; Check that packing incoming arguments into a big vector type
|
||||
; and unpacking them in registers for the call to @bar gets selected as just
|
||||
; simple copies. I.e., we don't artificial try to keep the big
|
||||
; vector (%vec) alive.
|
||||
define void @shuffle_to_concat_vector(<2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-LABEL: name: shuffle_to_concat_vector
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $q0, $q1
|
||||
; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
|
||||
; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
|
||||
; CHECK: $q0 = COPY [[COPY]]
|
||||
; CHECK: $q1 = COPY [[COPY1]]
|
||||
; CHECK: BL @bar, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $q0, implicit $q1
|
||||
; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
|
||||
; CHECK: RET_ReallyLR
|
||||
%vec = shufflevector <2 x i64> %a, <2 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
call void @bar(<4 x i64> %vec)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @bar(<4 x i64> %vec)
|
|
@ -1,15 +1,5 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -O0 -run-pass=legalizer %s -o - | FileCheck %s
|
||||
|
||||
--- |
|
||||
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "aarch64--"
|
||||
define void @test_unmerge() {
|
||||
entry:
|
||||
ret void
|
||||
}
|
||||
define void @test_legal_const_ext() { ret void }
|
||||
...
|
||||
# RUN: llc -O0 -mtriple aarch64-- -run-pass=legalizer %s -o - | FileCheck %s
|
||||
|
||||
---
|
||||
name: test_unmerge
|
||||
|
@ -44,3 +34,60 @@ body: |
|
|||
%4:_(s32) = G_ANYEXT %3(s1)
|
||||
$w0 = COPY %4(s32)
|
||||
...
|
||||
|
||||
# Check that the artifact combiner can get rid of the big
|
||||
# vector type (4 x s64) by combining the G_UNMERGE_VALUES
|
||||
# with the G_CONCAT_VECTORS and turning that into bitcast.
|
||||
---
|
||||
name: concat_vectors_unmerge_to_bitcast
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $q0, $q1
|
||||
|
||||
; CHECK-LABEL: name: concat_vectors_unmerge_to_bitcast
|
||||
; CHECK: liveins: $q0, $q1
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
|
||||
; CHECK: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[COPY]](<2 x s64>)
|
||||
; CHECK: [[BITCAST1:%[0-9]+]]:_(s128) = G_BITCAST [[COPY1]](<2 x s64>)
|
||||
; CHECK: $q0 = COPY [[BITCAST]](s128)
|
||||
; CHECK: $q1 = COPY [[BITCAST1]](s128)
|
||||
%0:_(<2 x s64>) = COPY $q0
|
||||
%1:_(<2 x s64>) = COPY $q1
|
||||
%2:_(<4 x s64>) = G_CONCAT_VECTORS %0(<2 x s64>), %1(<2 x s64>)
|
||||
%3:_(s128), %4:_(s128) = G_UNMERGE_VALUES %2(<4 x s64>)
|
||||
$q0 = COPY %3(s128)
|
||||
$q1 = COPY %4(s128)
|
||||
...
|
||||
|
||||
# Check that the artifact combiner can get rid of the big
|
||||
# vector type (4 x s64) by combining the G_UNMERGE_VALUES
|
||||
# with the G_CONCAT_VECTORS and turning that into smaller
|
||||
# 2x64-bit G_UNMERGE_VALUES.
|
||||
---
|
||||
name: concat_vectors_unmerge_to_unmerge
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $q0, $q1
|
||||
|
||||
; CHECK-LABEL: name: concat_vectors_unmerge_to_unmerge
|
||||
; CHECK: liveins: $q0, $q1
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
|
||||
; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
|
||||
; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
|
||||
; CHECK: $x0 = COPY [[UV]](s64)
|
||||
; CHECK: $x1 = COPY [[UV1]](s64)
|
||||
; CHECK: $x2 = COPY [[UV2]](s64)
|
||||
; CHECK: $x3 = COPY [[UV3]](s64)
|
||||
%0:_(<2 x s64>) = COPY $q0
|
||||
%1:_(<2 x s64>) = COPY $q1
|
||||
%2:_(<4 x s64>) = G_CONCAT_VECTORS %0(<2 x s64>), %1(<2 x s64>)
|
||||
%3:_(s64), %4:_(s64), %5:_(s64), %6:_(s64) = G_UNMERGE_VALUES %2(<4 x s64>)
|
||||
$x0 = COPY %3(s64)
|
||||
$x1 = COPY %4(s64)
|
||||
$x2 = COPY %5(s64)
|
||||
$x3 = COPY %6(s64)
|
||||
...
|
||||
|
|
|
@ -225,25 +225,17 @@ body: |
|
|||
; CHECK-LABEL: name: test_unmerge_values_s1_trunc_v4s1_of_concat_vectors_v4s32_v2s32
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
|
||||
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
|
||||
; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
|
||||
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
|
||||
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
|
||||
; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32)
|
||||
; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32)
|
||||
; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
|
||||
; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32)
|
||||
; CHECK: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32)
|
||||
; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
|
||||
; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32)
|
||||
; CHECK: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C]](s32)
|
||||
; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
|
||||
; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32)
|
||||
; CHECK: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[C]](s32)
|
||||
; CHECK: $vgpr0 = COPY [[ASHR]](s32)
|
||||
; CHECK: $vgpr1 = COPY [[ASHR1]](s32)
|
||||
; CHECK: $vgpr2 = COPY [[ASHR2]](s32)
|
||||
; CHECK: $vgpr3 = COPY [[ASHR3]](s32)
|
||||
; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[COPY]](<2 x s32>), [[COPY1]](<2 x s32>)
|
||||
; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s1>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s32>)
|
||||
; CHECK: [[UV:%[0-9]+]]:_(s1), [[UV1:%[0-9]+]]:_(s1), [[UV2:%[0-9]+]]:_(s1), [[UV3:%[0-9]+]]:_(s1) = G_UNMERGE_VALUES [[TRUNC]](<4 x s1>)
|
||||
; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[UV]](s1)
|
||||
; CHECK: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[UV1]](s1)
|
||||
; CHECK: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[UV2]](s1)
|
||||
; CHECK: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[UV3]](s1)
|
||||
; CHECK: $vgpr0 = COPY [[SEXT]](s32)
|
||||
; CHECK: $vgpr1 = COPY [[SEXT1]](s32)
|
||||
; CHECK: $vgpr2 = COPY [[SEXT2]](s32)
|
||||
; CHECK: $vgpr3 = COPY [[SEXT3]](s32)
|
||||
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||
%1:_(<2 x s32>) = COPY $vgpr2_vgpr3
|
||||
%2:_(<4 x s32>) = G_CONCAT_VECTORS %0, %1
|
||||
|
@ -324,13 +316,10 @@ body: |
|
|||
; CHECK-LABEL: name: test_unmerge_values_s32_of_trunc_concat_vectors_v2s64_v2s64
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
|
||||
; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
|
||||
; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV]](s64)
|
||||
; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s64)
|
||||
; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
|
||||
; CHECK: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64)
|
||||
; CHECK: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64)
|
||||
; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s32), implicit [[TRUNC1]](s32), implicit [[TRUNC2]](s32), implicit [[TRUNC3]](s32)
|
||||
; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[COPY]](<2 x s64>), [[COPY1]](<2 x s64>)
|
||||
; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s32>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s64>)
|
||||
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](<4 x s32>)
|
||||
; CHECK: S_ENDPGM 0, implicit [[UV]](s32), implicit [[UV1]](s32), implicit [[UV2]](s32), implicit [[UV3]](s32)
|
||||
%0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
%1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
|
||||
%2:_(<4 x s64>) = G_CONCAT_VECTORS %0, %1
|
||||
|
@ -347,12 +336,14 @@ body: |
|
|||
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
|
||||
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
|
||||
; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
|
||||
; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[UV]](s32)
|
||||
; CHECK: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[UV1]](s32)
|
||||
; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
|
||||
; CHECK: [[SEXT2:%[0-9]+]]:_(s64) = G_SEXT [[UV2]](s32)
|
||||
; CHECK: [[SEXT3:%[0-9]+]]:_(s64) = G_SEXT [[UV3]](s32)
|
||||
; CHECK: S_ENDPGM 0, implicit [[SEXT]](s64), implicit [[SEXT1]](s64), implicit [[SEXT2]](s64), implicit [[SEXT3]](s64)
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[SEXT]](s64), [[SEXT1]](s64), [[SEXT2]](s64), [[SEXT3]](s64)
|
||||
; CHECK: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s64>)
|
||||
; CHECK: S_ENDPGM 0, implicit [[UV4]](s64), implicit [[UV5]](s64), implicit [[UV6]](s64), implicit [[UV7]](s64)
|
||||
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||
%1:_(<2 x s32>) = COPY $vgpr2_vgpr3
|
||||
%2:_(<4 x s32>) = G_CONCAT_VECTORS %0, %1
|
||||
|
@ -369,12 +360,14 @@ body: |
|
|||
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
|
||||
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
|
||||
; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
|
||||
; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV]](s32)
|
||||
; CHECK: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV1]](s32)
|
||||
; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
|
||||
; CHECK: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[UV2]](s32)
|
||||
; CHECK: [[ZEXT3:%[0-9]+]]:_(s64) = G_ZEXT [[UV3]](s32)
|
||||
; CHECK: S_ENDPGM 0, implicit [[ZEXT]](s64), implicit [[ZEXT1]](s64), implicit [[ZEXT2]](s64), implicit [[ZEXT3]](s64)
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ZEXT]](s64), [[ZEXT1]](s64), [[ZEXT2]](s64), [[ZEXT3]](s64)
|
||||
; CHECK: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s64>)
|
||||
; CHECK: S_ENDPGM 0, implicit [[UV4]](s64), implicit [[UV5]](s64), implicit [[UV6]](s64), implicit [[UV7]](s64)
|
||||
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||
%1:_(<2 x s32>) = COPY $vgpr2_vgpr3
|
||||
%2:_(<4 x s32>) = G_CONCAT_VECTORS %0, %1
|
||||
|
@ -391,12 +384,14 @@ body: |
|
|||
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
|
||||
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
|
||||
; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
|
||||
; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s32)
|
||||
; CHECK: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV1]](s32)
|
||||
; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
|
||||
; CHECK: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV2]](s32)
|
||||
; CHECK: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[UV3]](s32)
|
||||
; CHECK: S_ENDPGM 0, implicit [[ANYEXT]](s64), implicit [[ANYEXT1]](s64), implicit [[ANYEXT2]](s64), implicit [[ANYEXT3]](s64)
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ANYEXT]](s64), [[ANYEXT1]](s64), [[ANYEXT2]](s64), [[ANYEXT3]](s64)
|
||||
; CHECK: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s64>)
|
||||
; CHECK: S_ENDPGM 0, implicit [[UV4]](s64), implicit [[UV5]](s64), implicit [[UV6]](s64), implicit [[UV7]](s64)
|
||||
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||
%1:_(<2 x s32>) = COPY $vgpr2_vgpr3
|
||||
%2:_(<4 x s32>) = G_CONCAT_VECTORS %0, %1
|
||||
|
@ -412,13 +407,10 @@ body: |
|
|||
; CHECK-LABEL: name: test_unmerge_values_s8_of_trunc_v4s16_concat_vectors_v2s32_v2s32
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
|
||||
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
|
||||
; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s32)
|
||||
; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s32)
|
||||
; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
|
||||
; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s32)
|
||||
; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s32)
|
||||
; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s8), implicit [[TRUNC1]](s8), implicit %6:_(s8), implicit %7:_(s8), implicit [[TRUNC2]](s8), implicit [[TRUNC3]](s8), implicit %10:_(s8), implicit %11:_(s8)
|
||||
; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[COPY]](<2 x s32>), [[COPY1]](<2 x s32>)
|
||||
; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s32>)
|
||||
; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC]](<4 x s16>)
|
||||
; CHECK: S_ENDPGM 0, implicit [[UV]](s8), implicit [[UV1]](s8), implicit [[UV2]](s8), implicit [[UV3]](s8), implicit [[UV4]](s8), implicit [[UV5]](s8), implicit [[UV6]](s8), implicit [[UV7]](s8)
|
||||
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||
%1:_(<2 x s32>) = COPY $vgpr2_vgpr3
|
||||
%2:_(<4 x s32>) = G_CONCAT_VECTORS %0, %1
|
||||
|
@ -497,3 +489,42 @@ body: |
|
|||
%6:_(s32), %7:_(s32) = G_UNMERGE_VALUES %5
|
||||
S_ENDPGM 0, implicit %6, implicit %7
|
||||
...
|
||||
|
||||
# To properly simplify that one, we would need to insert bitcast
|
||||
# after the G_ZEXT.
|
||||
# i.e.,
|
||||
# s64 = zext <2 x s16> <-- invalid
|
||||
# vs.
|
||||
# <2 x s32> = zext <2 x s16>
|
||||
# s64 = bitcast <2 x s32> <-- we are missing the code to do that
|
||||
---
|
||||
name: test_unmerge_values_s128_of_zext_of_concat_vectors
|
||||
body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: test_unmerge_values_s128_of_zext_of_concat_vectors
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
|
||||
; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
|
||||
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
|
||||
; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
|
||||
; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
|
||||
; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
|
||||
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
|
||||
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
|
||||
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
|
||||
; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
|
||||
; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
|
||||
; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
|
||||
; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
|
||||
; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
|
||||
; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32)
|
||||
; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
|
||||
; CHECK: S_ENDPGM 0, implicit [[UV]](s64), implicit [[UV1]](s64)
|
||||
%0:_(<2 x s16>) = COPY $vgpr0
|
||||
%1:_(<2 x s16>) = COPY $vgpr1
|
||||
%2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1
|
||||
%3:_(<4 x s32>) = G_ZEXT %2
|
||||
%4:_(s64), %5:_(s64) = G_UNMERGE_VALUES %3
|
||||
S_ENDPGM 0, implicit %4, implicit %5
|
||||
...
|
||||
|
|
|
@ -713,16 +713,16 @@ body: |
|
|||
; CHECK: [[ADD61:%[0-9]+]]:_(s32) = G_ADD [[UV61]], [[UV125]]
|
||||
; CHECK: [[ADD62:%[0-9]+]]:_(s32) = G_ADD [[UV62]], [[UV126]]
|
||||
; CHECK: [[ADD63:%[0-9]+]]:_(s32) = G_ADD [[UV63]], [[UV127]]
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32), [[ADD8]](s32), [[ADD9]](s32), [[ADD10]](s32), [[ADD11]](s32), [[ADD12]](s32), [[ADD13]](s32), [[ADD14]](s32), [[ADD15]](s32), [[ADD16]](s32), [[ADD17]](s32), [[ADD18]](s32), [[ADD19]](s32), [[ADD20]](s32), [[ADD21]](s32), [[ADD22]](s32), [[ADD23]](s32), [[ADD24]](s32), [[ADD25]](s32), [[ADD26]](s32), [[ADD27]](s32), [[ADD28]](s32), [[ADD29]](s32), [[ADD30]](s32), [[ADD31]](s32)
|
||||
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[ADD32]](s32), [[ADD33]](s32), [[ADD34]](s32), [[ADD35]](s32), [[ADD36]](s32), [[ADD37]](s32), [[ADD38]](s32), [[ADD39]](s32), [[ADD40]](s32), [[ADD41]](s32), [[ADD42]](s32), [[ADD43]](s32), [[ADD44]](s32), [[ADD45]](s32), [[ADD46]](s32), [[ADD47]](s32), [[ADD48]](s32), [[ADD49]](s32), [[ADD50]](s32), [[ADD51]](s32), [[ADD52]](s32), [[ADD53]](s32), [[ADD54]](s32), [[ADD55]](s32), [[ADD56]](s32), [[ADD57]](s32), [[ADD58]](s32), [[ADD59]](s32), [[ADD60]](s32), [[ADD61]](s32), [[ADD62]](s32), [[ADD63]](s32)
|
||||
; CHECK: [[UV128:%[0-9]+]]:_(<16 x s32>), [[UV129:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>)
|
||||
; CHECK: [[UV130:%[0-9]+]]:_(<16 x s32>), [[UV131:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<32 x s32>)
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32), [[ADD8]](s32), [[ADD9]](s32), [[ADD10]](s32), [[ADD11]](s32), [[ADD12]](s32), [[ADD13]](s32), [[ADD14]](s32), [[ADD15]](s32)
|
||||
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD16]](s32), [[ADD17]](s32), [[ADD18]](s32), [[ADD19]](s32), [[ADD20]](s32), [[ADD21]](s32), [[ADD22]](s32), [[ADD23]](s32), [[ADD24]](s32), [[ADD25]](s32), [[ADD26]](s32), [[ADD27]](s32), [[ADD28]](s32), [[ADD29]](s32), [[ADD30]](s32), [[ADD31]](s32)
|
||||
; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD32]](s32), [[ADD33]](s32), [[ADD34]](s32), [[ADD35]](s32), [[ADD36]](s32), [[ADD37]](s32), [[ADD38]](s32), [[ADD39]](s32), [[ADD40]](s32), [[ADD41]](s32), [[ADD42]](s32), [[ADD43]](s32), [[ADD44]](s32), [[ADD45]](s32), [[ADD46]](s32), [[ADD47]](s32)
|
||||
; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD48]](s32), [[ADD49]](s32), [[ADD50]](s32), [[ADD51]](s32), [[ADD52]](s32), [[ADD53]](s32), [[ADD54]](s32), [[ADD55]](s32), [[ADD56]](s32), [[ADD57]](s32), [[ADD58]](s32), [[ADD59]](s32), [[ADD60]](s32), [[ADD61]](s32), [[ADD62]](s32), [[ADD63]](s32)
|
||||
; CHECK: G_BR %bb.2
|
||||
; CHECK: bb.2:
|
||||
; CHECK: [[PHI:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[UV128]](<16 x s32>), %bb.1
|
||||
; CHECK: [[PHI1:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF1]](<16 x s32>), %bb.0, [[UV129]](<16 x s32>), %bb.1
|
||||
; CHECK: [[PHI2:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF2]](<16 x s32>), %bb.0, [[UV130]](<16 x s32>), %bb.1
|
||||
; CHECK: [[PHI3:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF3]](<16 x s32>), %bb.0, [[UV131]](<16 x s32>), %bb.1
|
||||
; CHECK: [[PHI:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[BUILD_VECTOR]](<16 x s32>), %bb.1
|
||||
; CHECK: [[PHI1:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF1]](<16 x s32>), %bb.0, [[BUILD_VECTOR1]](<16 x s32>), %bb.1
|
||||
; CHECK: [[PHI2:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF2]](<16 x s32>), %bb.0, [[BUILD_VECTOR2]](<16 x s32>), %bb.1
|
||||
; CHECK: [[PHI3:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF3]](<16 x s32>), %bb.0, [[BUILD_VECTOR3]](<16 x s32>), %bb.1
|
||||
; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[PHI]](<16 x s32>), [[PHI1]](<16 x s32>), [[PHI2]](<16 x s32>), [[PHI3]](<16 x s32>)
|
||||
; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[CONCAT_VECTORS]](<64 x s32>)
|
||||
bb.0:
|
||||
|
|
Loading…
Reference in New Issue