forked from OSchip/llvm-project
GlobalISel: Revisit users of other merge opcodes in artifact combiner
The artifact combiner searches for the uses of G_MERGE_VALUES for unmerge/trunc that need further combining. This also needs to handle the vector merge opcodes the same way. This fixes leaving behind some pairs I expected to be removed, that were if the legalizer is run a second time.
This commit is contained in:
parent
e0ec7a0206
commit
fe171908e9
|
@ -750,6 +750,8 @@ public:
|
||||||
Changed = tryCombineMerges(MI, DeadInsts, UpdatedDefs, WrapperObserver);
|
Changed = tryCombineMerges(MI, DeadInsts, UpdatedDefs, WrapperObserver);
|
||||||
break;
|
break;
|
||||||
case TargetOpcode::G_MERGE_VALUES:
|
case TargetOpcode::G_MERGE_VALUES:
|
||||||
|
case TargetOpcode::G_BUILD_VECTOR:
|
||||||
|
case TargetOpcode::G_CONCAT_VECTORS:
|
||||||
// If any of the users of this merge are an unmerge, then add them to the
|
// If any of the users of this merge are an unmerge, then add them to the
|
||||||
// artifact worklist in case there's folding that can be done looking up.
|
// artifact worklist in case there's folding that can be done looking up.
|
||||||
for (MachineInstr &U : MRI.use_instructions(MI.getOperand(0).getReg())) {
|
for (MachineInstr &U : MRI.use_instructions(MI.getOperand(0).getReg())) {
|
||||||
|
|
|
@ -0,0 +1,31 @@
|
||||||
|
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||||
|
# RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s
|
||||||
|
|
||||||
|
# The G_ZEXT and G_SHL will be scalarized, introducing a
|
||||||
|
# G_UNMERGE_VALUES of G_BUILD_VECTOR. The artifact combiner should
|
||||||
|
# eliminate the pair.
|
||||||
|
---
|
||||||
|
name: revisit_build_vector_unmerge_user
|
||||||
|
tracksRegLiveness: true
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $vgpr0_vgpr1
|
||||||
|
|
||||||
|
; GFX9-LABEL: name: revisit_build_vector_unmerge_user
|
||||||
|
; GFX9: liveins: $vgpr0_vgpr1
|
||||||
|
; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||||
|
; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
|
||||||
|
; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
|
||||||
|
; GFX9: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV]](s32)
|
||||||
|
; GFX9: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV1]](s32)
|
||||||
|
; GFX9: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[C]](s32)
|
||||||
|
; GFX9: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[ZEXT1]], [[C]](s32)
|
||||||
|
; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64)
|
||||||
|
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
|
||||||
|
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||||
|
%1:_(s32) = G_CONSTANT i32 2
|
||||||
|
%2:_(<2 x s32>) = G_BUILD_VECTOR %1, %1
|
||||||
|
%3:_(<2 x s64>) = G_ZEXT %0
|
||||||
|
%4:_(<2 x s64>) = G_SHL %3, %2
|
||||||
|
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4
|
||||||
|
...
|
|
@ -0,0 +1,46 @@
|
||||||
|
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||||
|
# RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s
|
||||||
|
|
||||||
|
# The G_SHL will be split into <2 x s16>, introducing a
|
||||||
|
# G_UNMERGE_VALUES of G_CONCAT_VECTORS. The artifact combiner should
|
||||||
|
# eliminate the pair.
|
||||||
|
---
|
||||||
|
name: revisit_concat_vectors_unmerge_user
|
||||||
|
tracksRegLiveness: true
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $vgpr0_vgpr1
|
||||||
|
|
||||||
|
; GFX9-LABEL: name: revisit_concat_vectors_unmerge_user
|
||||||
|
; GFX9: liveins: $vgpr0_vgpr1
|
||||||
|
; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||||
|
; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
|
||||||
|
; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
|
||||||
|
; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
|
||||||
|
; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32)
|
||||||
|
; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
|
||||||
|
; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32)
|
||||||
|
; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
|
||||||
|
; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
|
||||||
|
; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
|
||||||
|
; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
|
||||||
|
; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
|
||||||
|
; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
|
||||||
|
; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
|
||||||
|
; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
|
||||||
|
; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
|
||||||
|
; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
|
||||||
|
; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
|
||||||
|
; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[C3]](s32)
|
||||||
|
; GFX9: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
|
||||||
|
; GFX9: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
|
||||||
|
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[SHL]](<2 x s16>), [[SHL1]](<2 x s16>)
|
||||||
|
; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
|
||||||
|
%0:_(s32) = COPY $vgpr0
|
||||||
|
%1:_(<4 x s8>) = G_BITCAST %0
|
||||||
|
%2:_(s16) = G_CONSTANT i16 2
|
||||||
|
%3:_(<4 x s16>) = G_BUILD_VECTOR %2, %2, %2, %2
|
||||||
|
%4:_(<4 x s16>) = G_ANYEXT %1
|
||||||
|
%5:_(<4 x s16>) = G_SHL %4, %3
|
||||||
|
$vgpr0_vgpr1 = COPY %5
|
||||||
|
...
|
|
@ -194,9 +194,11 @@ body: |
|
||||||
; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ANYEXT1]](s16)
|
; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ANYEXT1]](s16)
|
||||||
; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ANYEXT]](s16)
|
; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ANYEXT]](s16)
|
||||||
; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ANYEXT1]](s16)
|
; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ANYEXT1]](s16)
|
||||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32), [[ZEXT2]](s32), [[ZEXT3]](s32)
|
; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[ZEXT]](s32)
|
||||||
; CHECK: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
|
; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[ZEXT1]](s32)
|
||||||
; CHECK: S_ENDPGM 0, implicit [[UV4]](<2 x s16>), implicit [[UV5]](<2 x s16>), implicit [[UV6]](<2 x s16>), implicit [[UV7]](<2 x s16>)
|
; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[ZEXT2]](s32)
|
||||||
|
; CHECK: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[ZEXT3]](s32)
|
||||||
|
; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<2 x s16>), implicit [[BITCAST1]](<2 x s16>), implicit [[BITCAST2]](<2 x s16>), implicit [[BITCAST3]](<2 x s16>)
|
||||||
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||||
%1:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
%1:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||||
%2:_(s32), %3:_(s32) = G_UNMERGE_VALUES %0(<2 x s32>)
|
%2:_(s32), %3:_(s32) = G_UNMERGE_VALUES %0(<2 x s32>)
|
||||||
|
@ -341,9 +343,7 @@ body: |
|
||||||
; CHECK: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[UV1]](s32)
|
; CHECK: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[UV1]](s32)
|
||||||
; CHECK: [[SEXT2:%[0-9]+]]:_(s64) = G_SEXT [[UV2]](s32)
|
; CHECK: [[SEXT2:%[0-9]+]]:_(s64) = G_SEXT [[UV2]](s32)
|
||||||
; CHECK: [[SEXT3:%[0-9]+]]:_(s64) = G_SEXT [[UV3]](s32)
|
; CHECK: [[SEXT3:%[0-9]+]]:_(s64) = G_SEXT [[UV3]](s32)
|
||||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[SEXT]](s64), [[SEXT1]](s64), [[SEXT2]](s64), [[SEXT3]](s64)
|
; CHECK: S_ENDPGM 0, implicit [[SEXT]](s64), implicit [[SEXT1]](s64), implicit [[SEXT2]](s64), implicit [[SEXT3]](s64)
|
||||||
; CHECK: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s64>)
|
|
||||||
; CHECK: S_ENDPGM 0, implicit [[UV4]](s64), implicit [[UV5]](s64), implicit [[UV6]](s64), implicit [[UV7]](s64)
|
|
||||||
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||||
%1:_(<2 x s32>) = COPY $vgpr2_vgpr3
|
%1:_(<2 x s32>) = COPY $vgpr2_vgpr3
|
||||||
%2:_(<4 x s32>) = G_CONCAT_VECTORS %0, %1
|
%2:_(<4 x s32>) = G_CONCAT_VECTORS %0, %1
|
||||||
|
@ -365,9 +365,7 @@ body: |
|
||||||
; CHECK: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV1]](s32)
|
; CHECK: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV1]](s32)
|
||||||
; CHECK: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[UV2]](s32)
|
; CHECK: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[UV2]](s32)
|
||||||
; CHECK: [[ZEXT3:%[0-9]+]]:_(s64) = G_ZEXT [[UV3]](s32)
|
; CHECK: [[ZEXT3:%[0-9]+]]:_(s64) = G_ZEXT [[UV3]](s32)
|
||||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ZEXT]](s64), [[ZEXT1]](s64), [[ZEXT2]](s64), [[ZEXT3]](s64)
|
; CHECK: S_ENDPGM 0, implicit [[ZEXT]](s64), implicit [[ZEXT1]](s64), implicit [[ZEXT2]](s64), implicit [[ZEXT3]](s64)
|
||||||
; CHECK: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s64>)
|
|
||||||
; CHECK: S_ENDPGM 0, implicit [[UV4]](s64), implicit [[UV5]](s64), implicit [[UV6]](s64), implicit [[UV7]](s64)
|
|
||||||
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||||
%1:_(<2 x s32>) = COPY $vgpr2_vgpr3
|
%1:_(<2 x s32>) = COPY $vgpr2_vgpr3
|
||||||
%2:_(<4 x s32>) = G_CONCAT_VECTORS %0, %1
|
%2:_(<4 x s32>) = G_CONCAT_VECTORS %0, %1
|
||||||
|
@ -389,9 +387,7 @@ body: |
|
||||||
; CHECK: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV1]](s32)
|
; CHECK: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV1]](s32)
|
||||||
; CHECK: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV2]](s32)
|
; CHECK: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV2]](s32)
|
||||||
; CHECK: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[UV3]](s32)
|
; CHECK: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[UV3]](s32)
|
||||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ANYEXT]](s64), [[ANYEXT1]](s64), [[ANYEXT2]](s64), [[ANYEXT3]](s64)
|
; CHECK: S_ENDPGM 0, implicit [[ANYEXT]](s64), implicit [[ANYEXT1]](s64), implicit [[ANYEXT2]](s64), implicit [[ANYEXT3]](s64)
|
||||||
; CHECK: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s64>)
|
|
||||||
; CHECK: S_ENDPGM 0, implicit [[UV4]](s64), implicit [[UV5]](s64), implicit [[UV6]](s64), implicit [[UV7]](s64)
|
|
||||||
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||||
%1:_(<2 x s32>) = COPY $vgpr2_vgpr3
|
%1:_(<2 x s32>) = COPY $vgpr2_vgpr3
|
||||||
%2:_(<4 x s32>) = G_CONCAT_VECTORS %0, %1
|
%2:_(<4 x s32>) = G_CONCAT_VECTORS %0, %1
|
||||||
|
@ -477,9 +473,11 @@ body: |
|
||||||
; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY1]](<2 x s32>)
|
; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY1]](<2 x s32>)
|
||||||
; CHECK: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY2]](<2 x s32>)
|
; CHECK: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY2]](<2 x s32>)
|
||||||
; CHECK: [[TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY3]](<2 x s32>)
|
; CHECK: [[TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY3]](<2 x s32>)
|
||||||
; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[TRUNC2]](<2 x s16>), [[TRUNC3]](<2 x s16>)
|
; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[TRUNC]](<2 x s16>)
|
||||||
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s16>)
|
; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[TRUNC1]](<2 x s16>)
|
||||||
; CHECK: S_ENDPGM 0, implicit [[UV]](s32), implicit [[UV1]](s32), implicit [[UV2]](s32), implicit [[UV3]](s32)
|
; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[TRUNC2]](<2 x s16>)
|
||||||
|
; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[TRUNC3]](<2 x s16>)
|
||||||
|
; CHECK: S_ENDPGM 0, implicit [[BITCAST]](s32), implicit [[BITCAST1]](s32), implicit [[BITCAST2]](s32), implicit [[BITCAST3]](s32)
|
||||||
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||||
%1:_(<2 x s32>) = COPY $vgpr2_vgpr3
|
%1:_(<2 x s32>) = COPY $vgpr2_vgpr3
|
||||||
%2:_(<2 x s32>) = COPY $vgpr4_vgpr5
|
%2:_(<2 x s32>) = COPY $vgpr4_vgpr5
|
||||||
|
@ -503,9 +501,9 @@ body: |
|
||||||
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32)
|
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32)
|
||||||
; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>)
|
; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>)
|
||||||
; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR1]](<2 x s32>)
|
; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR1]](<2 x s32>)
|
||||||
; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>)
|
; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[TRUNC]](<2 x s16>)
|
||||||
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>)
|
; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[TRUNC1]](<2 x s16>)
|
||||||
; CHECK: S_ENDPGM 0, implicit [[UV]](s32), implicit [[UV1]](s32)
|
; CHECK: S_ENDPGM 0, implicit [[BITCAST]](s32), implicit [[BITCAST1]](s32)
|
||||||
%0:_(s32) = COPY $vgpr0
|
%0:_(s32) = COPY $vgpr0
|
||||||
%1:_(s32) = COPY $vgpr1
|
%1:_(s32) = COPY $vgpr1
|
||||||
%2:_(s32) = COPY $vgpr2
|
%2:_(s32) = COPY $vgpr2
|
||||||
|
@ -544,9 +542,9 @@ body: |
|
||||||
; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
|
; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
|
||||||
; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
|
; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
|
||||||
; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
|
; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
|
||||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32)
|
; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32)
|
||||||
; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
|
; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[AND2]](s32), [[AND3]](s32)
|
||||||
; CHECK: S_ENDPGM 0, implicit [[UV]](s64), implicit [[UV1]](s64)
|
; CHECK: S_ENDPGM 0, implicit [[MV]](s64), implicit [[MV1]](s64)
|
||||||
%0:_(<2 x s16>) = COPY $vgpr0
|
%0:_(<2 x s16>) = COPY $vgpr0
|
||||||
%1:_(<2 x s16>) = COPY $vgpr1
|
%1:_(<2 x s16>) = COPY $vgpr1
|
||||||
%2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1
|
%2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1
|
||||||
|
|
|
@ -624,18 +624,16 @@ body: |
|
||||||
; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
|
; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
|
||||||
; SI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV2]](s32)
|
; SI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV2]](s32)
|
||||||
; SI: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV3]](s32)
|
; SI: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV3]](s32)
|
||||||
; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[ZEXT]](s64), [[ZEXT1]](s64)
|
|
||||||
; SI: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s64>)
|
|
||||||
; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
|
; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
|
||||||
; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
|
; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
|
||||||
; SI: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV4]], [[COPY2]](s32)
|
; SI: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[COPY2]](s32)
|
||||||
; SI: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[C2]](s32)
|
; SI: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[ZEXT1]], [[C2]](s32)
|
||||||
; SI: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C]]
|
; SI: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C]]
|
||||||
; SI: [[AND3:%[0-9]+]]:_(s64) = G_AND [[SHL1]], [[C]]
|
; SI: [[AND3:%[0-9]+]]:_(s64) = G_AND [[SHL1]], [[C]]
|
||||||
; SI: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[AND2]]
|
; SI: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[AND2]]
|
||||||
; SI: [[OR1:%[0-9]+]]:_(s64) = G_OR [[AND1]], [[AND3]]
|
; SI: [[OR1:%[0-9]+]]:_(s64) = G_OR [[AND1]], [[AND3]]
|
||||||
; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR]](s64), [[OR1]](s64)
|
; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR]](s64), [[OR1]](s64)
|
||||||
; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s64>)
|
; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
|
||||||
; VI-LABEL: name: test_copysign_v2s64_v2s32
|
; VI-LABEL: name: test_copysign_v2s64_v2s32
|
||||||
; VI: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
; VI: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||||
; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
|
; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
|
||||||
|
@ -647,18 +645,16 @@ body: |
|
||||||
; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
|
; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
|
||||||
; VI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV2]](s32)
|
; VI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV2]](s32)
|
||||||
; VI: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV3]](s32)
|
; VI: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV3]](s32)
|
||||||
; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[ZEXT]](s64), [[ZEXT1]](s64)
|
|
||||||
; VI: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s64>)
|
|
||||||
; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
|
; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
|
||||||
; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
|
; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
|
||||||
; VI: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV4]], [[COPY2]](s32)
|
; VI: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[COPY2]](s32)
|
||||||
; VI: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[C2]](s32)
|
; VI: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[ZEXT1]], [[C2]](s32)
|
||||||
; VI: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C]]
|
; VI: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C]]
|
||||||
; VI: [[AND3:%[0-9]+]]:_(s64) = G_AND [[SHL1]], [[C]]
|
; VI: [[AND3:%[0-9]+]]:_(s64) = G_AND [[SHL1]], [[C]]
|
||||||
; VI: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[AND2]]
|
; VI: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[AND2]]
|
||||||
; VI: [[OR1:%[0-9]+]]:_(s64) = G_OR [[AND1]], [[AND3]]
|
; VI: [[OR1:%[0-9]+]]:_(s64) = G_OR [[AND1]], [[AND3]]
|
||||||
; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR]](s64), [[OR1]](s64)
|
; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR]](s64), [[OR1]](s64)
|
||||||
; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s64>)
|
; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
|
||||||
; GFX9-LABEL: name: test_copysign_v2s64_v2s32
|
; GFX9-LABEL: name: test_copysign_v2s64_v2s32
|
||||||
; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||||
; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
|
; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
|
||||||
|
@ -670,18 +666,16 @@ body: |
|
||||||
; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
|
; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
|
||||||
; GFX9: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV2]](s32)
|
; GFX9: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV2]](s32)
|
||||||
; GFX9: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV3]](s32)
|
; GFX9: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV3]](s32)
|
||||||
; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[ZEXT]](s64), [[ZEXT1]](s64)
|
|
||||||
; GFX9: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s64>)
|
|
||||||
; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
|
; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
|
||||||
; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
|
; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
|
||||||
; GFX9: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV4]], [[COPY2]](s32)
|
; GFX9: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[COPY2]](s32)
|
||||||
; GFX9: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[C2]](s32)
|
; GFX9: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[ZEXT1]], [[C2]](s32)
|
||||||
; GFX9: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C]]
|
; GFX9: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C]]
|
||||||
; GFX9: [[AND3:%[0-9]+]]:_(s64) = G_AND [[SHL1]], [[C]]
|
; GFX9: [[AND3:%[0-9]+]]:_(s64) = G_AND [[SHL1]], [[C]]
|
||||||
; GFX9: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[AND2]]
|
; GFX9: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[AND2]]
|
||||||
; GFX9: [[OR1:%[0-9]+]]:_(s64) = G_OR [[AND1]], [[AND3]]
|
; GFX9: [[OR1:%[0-9]+]]:_(s64) = G_OR [[AND1]], [[AND3]]
|
||||||
; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR]](s64), [[OR1]](s64)
|
; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR]](s64), [[OR1]](s64)
|
||||||
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s64>)
|
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
|
||||||
%0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
%0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||||
%1:_(<2 x s32>) = COPY $vgpr4_vgpr5
|
%1:_(<2 x s32>) = COPY $vgpr4_vgpr5
|
||||||
%2:_(<2 x s64>) = G_FCOPYSIGN %0, %1
|
%2:_(<2 x s64>) = G_FCOPYSIGN %0, %1
|
||||||
|
|
|
@ -906,28 +906,28 @@ define amdgpu_ps i32 @s_saddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
|
||||||
; GFX9-NEXT: s_lshr_b32 s3, s0, 16
|
; GFX9-NEXT: s_lshr_b32 s3, s0, 16
|
||||||
; GFX9-NEXT: s_lshr_b32 s4, s0, 24
|
; GFX9-NEXT: s_lshr_b32 s4, s0, 24
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2
|
; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2
|
||||||
; GFX9-NEXT: s_lshr_b32 s5, s1, 8
|
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s2, s3, s4
|
; GFX9-NEXT: s_pack_ll_b32_b16 s2, s3, s4
|
||||||
|
; GFX9-NEXT: s_lshr_b32 s4, s0, 16
|
||||||
|
; GFX9-NEXT: s_mov_b32 s3, 0x80008
|
||||||
|
; GFX9-NEXT: s_lshr_b32 s5, s1, 8
|
||||||
|
; GFX9-NEXT: s_lshl_b32 s0, s0, s3
|
||||||
|
; GFX9-NEXT: s_lshl_b32 s4, s4, 8
|
||||||
|
; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s4
|
||||||
|
; GFX9-NEXT: s_lshr_b32 s4, s2, 16
|
||||||
; GFX9-NEXT: s_lshr_b32 s6, s1, 16
|
; GFX9-NEXT: s_lshr_b32 s6, s1, 16
|
||||||
; GFX9-NEXT: s_lshr_b32 s7, s1, 24
|
; GFX9-NEXT: s_lshr_b32 s7, s1, 24
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s5
|
; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s5
|
||||||
; GFX9-NEXT: s_lshr_b32 s5, s0, 16
|
|
||||||
; GFX9-NEXT: s_mov_b32 s4, 0x80008
|
|
||||||
; GFX9-NEXT: s_lshl_b32 s0, s0, s4
|
|
||||||
; GFX9-NEXT: s_lshl_b32 s5, s5, 8
|
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s5
|
|
||||||
; GFX9-NEXT: s_lshr_b32 s5, s2, 16
|
|
||||||
; GFX9-NEXT: s_lshl_b32 s2, s2, s4
|
|
||||||
; GFX9-NEXT: s_lshl_b32 s5, s5, 8
|
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s5
|
|
||||||
; GFX9-NEXT: s_lshr_b32 s5, s1, 16
|
; GFX9-NEXT: s_lshr_b32 s5, s1, 16
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s3, s6, s7
|
; GFX9-NEXT: s_lshl_b32 s2, s2, s3
|
||||||
; GFX9-NEXT: s_lshl_b32 s1, s1, s4
|
; GFX9-NEXT: s_lshl_b32 s4, s4, 8
|
||||||
|
; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s4
|
||||||
|
; GFX9-NEXT: s_pack_ll_b32_b16 s4, s6, s7
|
||||||
|
; GFX9-NEXT: s_lshl_b32 s1, s1, s3
|
||||||
; GFX9-NEXT: s_lshl_b32 s5, s5, 8
|
; GFX9-NEXT: s_lshl_b32 s5, s5, 8
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s5
|
; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s5
|
||||||
; GFX9-NEXT: s_lshr_b32 s5, s3, 16
|
; GFX9-NEXT: s_lshr_b32 s5, s4, 16
|
||||||
; GFX9-NEXT: v_mov_b32_e32 v0, s1
|
; GFX9-NEXT: v_mov_b32_e32 v0, s1
|
||||||
; GFX9-NEXT: s_lshl_b32 s3, s3, s4
|
; GFX9-NEXT: s_lshl_b32 s3, s4, s3
|
||||||
; GFX9-NEXT: s_lshl_b32 s4, s5, 8
|
; GFX9-NEXT: s_lshl_b32 s4, s5, 8
|
||||||
; GFX9-NEXT: v_pk_add_i16 v0, s0, v0 clamp
|
; GFX9-NEXT: v_pk_add_i16 v0, s0, v0 clamp
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s3, s3, s4
|
; GFX9-NEXT: s_pack_ll_b32_b16 s3, s3, s4
|
||||||
|
@ -956,25 +956,25 @@ define amdgpu_ps i32 @s_saddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
|
||||||
; GFX10-NEXT: s_lshr_b32 s6, s1, 16
|
; GFX10-NEXT: s_lshr_b32 s6, s1, 16
|
||||||
; GFX10-NEXT: s_lshr_b32 s7, s1, 24
|
; GFX10-NEXT: s_lshr_b32 s7, s1, 24
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s5
|
; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s5
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s2, s3, s4
|
; GFX10-NEXT: s_lshr_b32 s8, s0, 16
|
||||||
; GFX10-NEXT: s_lshr_b32 s4, s0, 16
|
; GFX10-NEXT: s_lshr_b32 s5, s1, 16
|
||||||
; GFX10-NEXT: s_mov_b32 s3, 0x80008
|
; GFX10-NEXT: s_mov_b32 s2, 0x80008
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s5, s6, s7
|
; GFX10-NEXT: s_lshl_b32 s8, s8, 8
|
||||||
; GFX10-NEXT: s_lshr_b32 s6, s1, 16
|
; GFX10-NEXT: s_lshl_b32 s0, s0, s2
|
||||||
; GFX10-NEXT: s_lshl_b32 s0, s0, s3
|
; GFX10-NEXT: s_lshl_b32 s1, s1, s2
|
||||||
; GFX10-NEXT: s_lshl_b32 s4, s4, 8
|
; GFX10-NEXT: s_lshl_b32 s5, s5, 8
|
||||||
; GFX10-NEXT: s_lshl_b32 s1, s1, s3
|
; GFX10-NEXT: s_pack_ll_b32_b16 s3, s3, s4
|
||||||
; GFX10-NEXT: s_lshl_b32 s6, s6, 8
|
; GFX10-NEXT: s_pack_ll_b32_b16 s6, s6, s7
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s4
|
; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s5
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s6
|
; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s8
|
||||||
; GFX10-NEXT: s_lshr_b32 s4, s2, 16
|
; GFX10-NEXT: s_lshr_b32 s4, s3, 16
|
||||||
; GFX10-NEXT: s_lshr_b32 s6, s5, 16
|
; GFX10-NEXT: s_lshr_b32 s5, s6, 16
|
||||||
; GFX10-NEXT: v_pk_add_i16 v0, s0, s1 clamp
|
; GFX10-NEXT: v_pk_add_i16 v0, s0, s1 clamp
|
||||||
; GFX10-NEXT: s_lshl_b32 s2, s2, s3
|
; GFX10-NEXT: s_lshl_b32 s3, s3, s2
|
||||||
; GFX10-NEXT: s_lshl_b32 s4, s4, 8
|
; GFX10-NEXT: s_lshl_b32 s4, s4, 8
|
||||||
; GFX10-NEXT: s_lshl_b32 s0, s5, s3
|
; GFX10-NEXT: s_lshl_b32 s0, s6, s2
|
||||||
; GFX10-NEXT: s_lshl_b32 s1, s6, 8
|
; GFX10-NEXT: s_lshl_b32 s1, s5, 8
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s2, s2, s4
|
; GFX10-NEXT: s_pack_ll_b32_b16 s2, s3, s4
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s1
|
; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s1
|
||||||
; GFX10-NEXT: v_pk_ashrrev_i16 v0, 8, v0 op_sel_hi:[0,1]
|
; GFX10-NEXT: v_pk_ashrrev_i16 v0, 8, v0 op_sel_hi:[0,1]
|
||||||
; GFX10-NEXT: v_pk_add_i16 v1, s2, s0 clamp
|
; GFX10-NEXT: v_pk_add_i16 v1, s2, s0 clamp
|
||||||
|
|
|
@ -906,28 +906,28 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
|
||||||
; GFX9-NEXT: s_lshr_b32 s3, s0, 16
|
; GFX9-NEXT: s_lshr_b32 s3, s0, 16
|
||||||
; GFX9-NEXT: s_lshr_b32 s4, s0, 24
|
; GFX9-NEXT: s_lshr_b32 s4, s0, 24
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2
|
; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2
|
||||||
; GFX9-NEXT: s_lshr_b32 s5, s1, 8
|
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s2, s3, s4
|
; GFX9-NEXT: s_pack_ll_b32_b16 s2, s3, s4
|
||||||
|
; GFX9-NEXT: s_lshr_b32 s4, s0, 16
|
||||||
|
; GFX9-NEXT: s_mov_b32 s3, 0x80008
|
||||||
|
; GFX9-NEXT: s_lshr_b32 s5, s1, 8
|
||||||
|
; GFX9-NEXT: s_lshl_b32 s0, s0, s3
|
||||||
|
; GFX9-NEXT: s_lshl_b32 s4, s4, 8
|
||||||
|
; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s4
|
||||||
|
; GFX9-NEXT: s_lshr_b32 s4, s2, 16
|
||||||
; GFX9-NEXT: s_lshr_b32 s6, s1, 16
|
; GFX9-NEXT: s_lshr_b32 s6, s1, 16
|
||||||
; GFX9-NEXT: s_lshr_b32 s7, s1, 24
|
; GFX9-NEXT: s_lshr_b32 s7, s1, 24
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s5
|
; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s5
|
||||||
; GFX9-NEXT: s_lshr_b32 s5, s0, 16
|
|
||||||
; GFX9-NEXT: s_mov_b32 s4, 0x80008
|
|
||||||
; GFX9-NEXT: s_lshl_b32 s0, s0, s4
|
|
||||||
; GFX9-NEXT: s_lshl_b32 s5, s5, 8
|
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s5
|
|
||||||
; GFX9-NEXT: s_lshr_b32 s5, s2, 16
|
|
||||||
; GFX9-NEXT: s_lshl_b32 s2, s2, s4
|
|
||||||
; GFX9-NEXT: s_lshl_b32 s5, s5, 8
|
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s5
|
|
||||||
; GFX9-NEXT: s_lshr_b32 s5, s1, 16
|
; GFX9-NEXT: s_lshr_b32 s5, s1, 16
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s3, s6, s7
|
; GFX9-NEXT: s_lshl_b32 s2, s2, s3
|
||||||
; GFX9-NEXT: s_lshl_b32 s1, s1, s4
|
; GFX9-NEXT: s_lshl_b32 s4, s4, 8
|
||||||
|
; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s4
|
||||||
|
; GFX9-NEXT: s_pack_ll_b32_b16 s4, s6, s7
|
||||||
|
; GFX9-NEXT: s_lshl_b32 s1, s1, s3
|
||||||
; GFX9-NEXT: s_lshl_b32 s5, s5, 8
|
; GFX9-NEXT: s_lshl_b32 s5, s5, 8
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s5
|
; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s5
|
||||||
; GFX9-NEXT: s_lshr_b32 s5, s3, 16
|
; GFX9-NEXT: s_lshr_b32 s5, s4, 16
|
||||||
; GFX9-NEXT: v_mov_b32_e32 v0, s1
|
; GFX9-NEXT: v_mov_b32_e32 v0, s1
|
||||||
; GFX9-NEXT: s_lshl_b32 s3, s3, s4
|
; GFX9-NEXT: s_lshl_b32 s3, s4, s3
|
||||||
; GFX9-NEXT: s_lshl_b32 s4, s5, 8
|
; GFX9-NEXT: s_lshl_b32 s4, s5, 8
|
||||||
; GFX9-NEXT: v_pk_sub_i16 v0, s0, v0 clamp
|
; GFX9-NEXT: v_pk_sub_i16 v0, s0, v0 clamp
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s3, s3, s4
|
; GFX9-NEXT: s_pack_ll_b32_b16 s3, s3, s4
|
||||||
|
@ -956,25 +956,25 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
|
||||||
; GFX10-NEXT: s_lshr_b32 s6, s1, 16
|
; GFX10-NEXT: s_lshr_b32 s6, s1, 16
|
||||||
; GFX10-NEXT: s_lshr_b32 s7, s1, 24
|
; GFX10-NEXT: s_lshr_b32 s7, s1, 24
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s5
|
; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s5
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s2, s3, s4
|
; GFX10-NEXT: s_lshr_b32 s8, s0, 16
|
||||||
; GFX10-NEXT: s_lshr_b32 s4, s0, 16
|
; GFX10-NEXT: s_lshr_b32 s5, s1, 16
|
||||||
; GFX10-NEXT: s_mov_b32 s3, 0x80008
|
; GFX10-NEXT: s_mov_b32 s2, 0x80008
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s5, s6, s7
|
; GFX10-NEXT: s_lshl_b32 s8, s8, 8
|
||||||
; GFX10-NEXT: s_lshr_b32 s6, s1, 16
|
; GFX10-NEXT: s_lshl_b32 s0, s0, s2
|
||||||
; GFX10-NEXT: s_lshl_b32 s0, s0, s3
|
; GFX10-NEXT: s_lshl_b32 s1, s1, s2
|
||||||
; GFX10-NEXT: s_lshl_b32 s4, s4, 8
|
; GFX10-NEXT: s_lshl_b32 s5, s5, 8
|
||||||
; GFX10-NEXT: s_lshl_b32 s1, s1, s3
|
; GFX10-NEXT: s_pack_ll_b32_b16 s3, s3, s4
|
||||||
; GFX10-NEXT: s_lshl_b32 s6, s6, 8
|
; GFX10-NEXT: s_pack_ll_b32_b16 s6, s6, s7
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s4
|
; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s5
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s6
|
; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s8
|
||||||
; GFX10-NEXT: s_lshr_b32 s4, s2, 16
|
; GFX10-NEXT: s_lshr_b32 s4, s3, 16
|
||||||
; GFX10-NEXT: s_lshr_b32 s6, s5, 16
|
; GFX10-NEXT: s_lshr_b32 s5, s6, 16
|
||||||
; GFX10-NEXT: v_pk_sub_i16 v0, s0, s1 clamp
|
; GFX10-NEXT: v_pk_sub_i16 v0, s0, s1 clamp
|
||||||
; GFX10-NEXT: s_lshl_b32 s2, s2, s3
|
; GFX10-NEXT: s_lshl_b32 s3, s3, s2
|
||||||
; GFX10-NEXT: s_lshl_b32 s4, s4, 8
|
; GFX10-NEXT: s_lshl_b32 s4, s4, 8
|
||||||
; GFX10-NEXT: s_lshl_b32 s0, s5, s3
|
; GFX10-NEXT: s_lshl_b32 s0, s6, s2
|
||||||
; GFX10-NEXT: s_lshl_b32 s1, s6, 8
|
; GFX10-NEXT: s_lshl_b32 s1, s5, 8
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s2, s2, s4
|
; GFX10-NEXT: s_pack_ll_b32_b16 s2, s3, s4
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s1
|
; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s1
|
||||||
; GFX10-NEXT: v_pk_ashrrev_i16 v0, 8, v0 op_sel_hi:[0,1]
|
; GFX10-NEXT: v_pk_ashrrev_i16 v0, 8, v0 op_sel_hi:[0,1]
|
||||||
; GFX10-NEXT: v_pk_sub_i16 v1, s2, s0 clamp
|
; GFX10-NEXT: v_pk_sub_i16 v1, s2, s0 clamp
|
||||||
|
|
|
@ -622,28 +622,28 @@ define amdgpu_ps i32 @s_uaddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
|
||||||
; GFX9-NEXT: s_lshr_b32 s3, s0, 16
|
; GFX9-NEXT: s_lshr_b32 s3, s0, 16
|
||||||
; GFX9-NEXT: s_lshr_b32 s4, s0, 24
|
; GFX9-NEXT: s_lshr_b32 s4, s0, 24
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2
|
; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2
|
||||||
; GFX9-NEXT: s_lshr_b32 s5, s1, 8
|
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s2, s3, s4
|
; GFX9-NEXT: s_pack_ll_b32_b16 s2, s3, s4
|
||||||
|
; GFX9-NEXT: s_lshr_b32 s4, s0, 16
|
||||||
|
; GFX9-NEXT: s_mov_b32 s3, 0x80008
|
||||||
|
; GFX9-NEXT: s_lshr_b32 s5, s1, 8
|
||||||
|
; GFX9-NEXT: s_lshl_b32 s0, s0, s3
|
||||||
|
; GFX9-NEXT: s_lshl_b32 s4, s4, 8
|
||||||
|
; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s4
|
||||||
|
; GFX9-NEXT: s_lshr_b32 s4, s2, 16
|
||||||
; GFX9-NEXT: s_lshr_b32 s6, s1, 16
|
; GFX9-NEXT: s_lshr_b32 s6, s1, 16
|
||||||
; GFX9-NEXT: s_lshr_b32 s7, s1, 24
|
; GFX9-NEXT: s_lshr_b32 s7, s1, 24
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s5
|
; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s5
|
||||||
; GFX9-NEXT: s_lshr_b32 s5, s0, 16
|
|
||||||
; GFX9-NEXT: s_mov_b32 s4, 0x80008
|
|
||||||
; GFX9-NEXT: s_lshl_b32 s0, s0, s4
|
|
||||||
; GFX9-NEXT: s_lshl_b32 s5, s5, 8
|
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s5
|
|
||||||
; GFX9-NEXT: s_lshr_b32 s5, s2, 16
|
|
||||||
; GFX9-NEXT: s_lshl_b32 s2, s2, s4
|
|
||||||
; GFX9-NEXT: s_lshl_b32 s5, s5, 8
|
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s5
|
|
||||||
; GFX9-NEXT: s_lshr_b32 s5, s1, 16
|
; GFX9-NEXT: s_lshr_b32 s5, s1, 16
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s3, s6, s7
|
; GFX9-NEXT: s_lshl_b32 s2, s2, s3
|
||||||
; GFX9-NEXT: s_lshl_b32 s1, s1, s4
|
; GFX9-NEXT: s_lshl_b32 s4, s4, 8
|
||||||
|
; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s4
|
||||||
|
; GFX9-NEXT: s_pack_ll_b32_b16 s4, s6, s7
|
||||||
|
; GFX9-NEXT: s_lshl_b32 s1, s1, s3
|
||||||
; GFX9-NEXT: s_lshl_b32 s5, s5, 8
|
; GFX9-NEXT: s_lshl_b32 s5, s5, 8
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s5
|
; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s5
|
||||||
; GFX9-NEXT: s_lshr_b32 s5, s3, 16
|
; GFX9-NEXT: s_lshr_b32 s5, s4, 16
|
||||||
; GFX9-NEXT: v_mov_b32_e32 v0, s1
|
; GFX9-NEXT: v_mov_b32_e32 v0, s1
|
||||||
; GFX9-NEXT: s_lshl_b32 s3, s3, s4
|
; GFX9-NEXT: s_lshl_b32 s3, s4, s3
|
||||||
; GFX9-NEXT: s_lshl_b32 s4, s5, 8
|
; GFX9-NEXT: s_lshl_b32 s4, s5, 8
|
||||||
; GFX9-NEXT: v_pk_add_u16 v0, s0, v0 clamp
|
; GFX9-NEXT: v_pk_add_u16 v0, s0, v0 clamp
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s3, s3, s4
|
; GFX9-NEXT: s_pack_ll_b32_b16 s3, s3, s4
|
||||||
|
@ -672,25 +672,25 @@ define amdgpu_ps i32 @s_uaddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
|
||||||
; GFX10-NEXT: s_lshr_b32 s6, s1, 16
|
; GFX10-NEXT: s_lshr_b32 s6, s1, 16
|
||||||
; GFX10-NEXT: s_lshr_b32 s7, s1, 24
|
; GFX10-NEXT: s_lshr_b32 s7, s1, 24
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s5
|
; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s5
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s2, s3, s4
|
; GFX10-NEXT: s_lshr_b32 s8, s0, 16
|
||||||
; GFX10-NEXT: s_lshr_b32 s4, s0, 16
|
; GFX10-NEXT: s_lshr_b32 s5, s1, 16
|
||||||
; GFX10-NEXT: s_mov_b32 s3, 0x80008
|
; GFX10-NEXT: s_mov_b32 s2, 0x80008
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s5, s6, s7
|
; GFX10-NEXT: s_lshl_b32 s8, s8, 8
|
||||||
; GFX10-NEXT: s_lshr_b32 s6, s1, 16
|
; GFX10-NEXT: s_lshl_b32 s0, s0, s2
|
||||||
; GFX10-NEXT: s_lshl_b32 s0, s0, s3
|
; GFX10-NEXT: s_lshl_b32 s1, s1, s2
|
||||||
; GFX10-NEXT: s_lshl_b32 s4, s4, 8
|
; GFX10-NEXT: s_lshl_b32 s5, s5, 8
|
||||||
; GFX10-NEXT: s_lshl_b32 s1, s1, s3
|
; GFX10-NEXT: s_pack_ll_b32_b16 s3, s3, s4
|
||||||
; GFX10-NEXT: s_lshl_b32 s6, s6, 8
|
; GFX10-NEXT: s_pack_ll_b32_b16 s6, s6, s7
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s4
|
; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s5
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s6
|
; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s8
|
||||||
; GFX10-NEXT: s_lshr_b32 s4, s2, 16
|
; GFX10-NEXT: s_lshr_b32 s4, s3, 16
|
||||||
; GFX10-NEXT: s_lshr_b32 s6, s5, 16
|
; GFX10-NEXT: s_lshr_b32 s5, s6, 16
|
||||||
; GFX10-NEXT: v_pk_add_u16 v0, s0, s1 clamp
|
; GFX10-NEXT: v_pk_add_u16 v0, s0, s1 clamp
|
||||||
; GFX10-NEXT: s_lshl_b32 s2, s2, s3
|
; GFX10-NEXT: s_lshl_b32 s3, s3, s2
|
||||||
; GFX10-NEXT: s_lshl_b32 s4, s4, 8
|
; GFX10-NEXT: s_lshl_b32 s4, s4, 8
|
||||||
; GFX10-NEXT: s_lshl_b32 s0, s5, s3
|
; GFX10-NEXT: s_lshl_b32 s0, s6, s2
|
||||||
; GFX10-NEXT: s_lshl_b32 s1, s6, 8
|
; GFX10-NEXT: s_lshl_b32 s1, s5, 8
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s2, s2, s4
|
; GFX10-NEXT: s_pack_ll_b32_b16 s2, s3, s4
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s1
|
; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s1
|
||||||
; GFX10-NEXT: v_pk_lshrrev_b16 v0, 8, v0 op_sel_hi:[0,1]
|
; GFX10-NEXT: v_pk_lshrrev_b16 v0, 8, v0 op_sel_hi:[0,1]
|
||||||
; GFX10-NEXT: v_pk_add_u16 v1, s2, s0 clamp
|
; GFX10-NEXT: v_pk_add_u16 v1, s2, s0 clamp
|
||||||
|
|
|
@ -606,28 +606,28 @@ define amdgpu_ps i32 @s_usubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
|
||||||
; GFX9-NEXT: s_lshr_b32 s3, s0, 16
|
; GFX9-NEXT: s_lshr_b32 s3, s0, 16
|
||||||
; GFX9-NEXT: s_lshr_b32 s4, s0, 24
|
; GFX9-NEXT: s_lshr_b32 s4, s0, 24
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2
|
; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2
|
||||||
; GFX9-NEXT: s_lshr_b32 s5, s1, 8
|
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s2, s3, s4
|
; GFX9-NEXT: s_pack_ll_b32_b16 s2, s3, s4
|
||||||
|
; GFX9-NEXT: s_lshr_b32 s4, s0, 16
|
||||||
|
; GFX9-NEXT: s_mov_b32 s3, 0x80008
|
||||||
|
; GFX9-NEXT: s_lshr_b32 s5, s1, 8
|
||||||
|
; GFX9-NEXT: s_lshl_b32 s0, s0, s3
|
||||||
|
; GFX9-NEXT: s_lshl_b32 s4, s4, 8
|
||||||
|
; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s4
|
||||||
|
; GFX9-NEXT: s_lshr_b32 s4, s2, 16
|
||||||
; GFX9-NEXT: s_lshr_b32 s6, s1, 16
|
; GFX9-NEXT: s_lshr_b32 s6, s1, 16
|
||||||
; GFX9-NEXT: s_lshr_b32 s7, s1, 24
|
; GFX9-NEXT: s_lshr_b32 s7, s1, 24
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s5
|
; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s5
|
||||||
; GFX9-NEXT: s_lshr_b32 s5, s0, 16
|
|
||||||
; GFX9-NEXT: s_mov_b32 s4, 0x80008
|
|
||||||
; GFX9-NEXT: s_lshl_b32 s0, s0, s4
|
|
||||||
; GFX9-NEXT: s_lshl_b32 s5, s5, 8
|
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s5
|
|
||||||
; GFX9-NEXT: s_lshr_b32 s5, s2, 16
|
|
||||||
; GFX9-NEXT: s_lshl_b32 s2, s2, s4
|
|
||||||
; GFX9-NEXT: s_lshl_b32 s5, s5, 8
|
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s5
|
|
||||||
; GFX9-NEXT: s_lshr_b32 s5, s1, 16
|
; GFX9-NEXT: s_lshr_b32 s5, s1, 16
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s3, s6, s7
|
; GFX9-NEXT: s_lshl_b32 s2, s2, s3
|
||||||
; GFX9-NEXT: s_lshl_b32 s1, s1, s4
|
; GFX9-NEXT: s_lshl_b32 s4, s4, 8
|
||||||
|
; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s4
|
||||||
|
; GFX9-NEXT: s_pack_ll_b32_b16 s4, s6, s7
|
||||||
|
; GFX9-NEXT: s_lshl_b32 s1, s1, s3
|
||||||
; GFX9-NEXT: s_lshl_b32 s5, s5, 8
|
; GFX9-NEXT: s_lshl_b32 s5, s5, 8
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s5
|
; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s5
|
||||||
; GFX9-NEXT: s_lshr_b32 s5, s3, 16
|
; GFX9-NEXT: s_lshr_b32 s5, s4, 16
|
||||||
; GFX9-NEXT: v_mov_b32_e32 v0, s1
|
; GFX9-NEXT: v_mov_b32_e32 v0, s1
|
||||||
; GFX9-NEXT: s_lshl_b32 s3, s3, s4
|
; GFX9-NEXT: s_lshl_b32 s3, s4, s3
|
||||||
; GFX9-NEXT: s_lshl_b32 s4, s5, 8
|
; GFX9-NEXT: s_lshl_b32 s4, s5, 8
|
||||||
; GFX9-NEXT: v_pk_sub_u16 v0, s0, v0 clamp
|
; GFX9-NEXT: v_pk_sub_u16 v0, s0, v0 clamp
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s3, s3, s4
|
; GFX9-NEXT: s_pack_ll_b32_b16 s3, s3, s4
|
||||||
|
@ -656,25 +656,25 @@ define amdgpu_ps i32 @s_usubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
|
||||||
; GFX10-NEXT: s_lshr_b32 s6, s1, 16
|
; GFX10-NEXT: s_lshr_b32 s6, s1, 16
|
||||||
; GFX10-NEXT: s_lshr_b32 s7, s1, 24
|
; GFX10-NEXT: s_lshr_b32 s7, s1, 24
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s5
|
; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s5
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s2, s3, s4
|
; GFX10-NEXT: s_lshr_b32 s8, s0, 16
|
||||||
; GFX10-NEXT: s_lshr_b32 s4, s0, 16
|
; GFX10-NEXT: s_lshr_b32 s5, s1, 16
|
||||||
; GFX10-NEXT: s_mov_b32 s3, 0x80008
|
; GFX10-NEXT: s_mov_b32 s2, 0x80008
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s5, s6, s7
|
; GFX10-NEXT: s_lshl_b32 s8, s8, 8
|
||||||
; GFX10-NEXT: s_lshr_b32 s6, s1, 16
|
; GFX10-NEXT: s_lshl_b32 s0, s0, s2
|
||||||
; GFX10-NEXT: s_lshl_b32 s0, s0, s3
|
; GFX10-NEXT: s_lshl_b32 s1, s1, s2
|
||||||
; GFX10-NEXT: s_lshl_b32 s4, s4, 8
|
; GFX10-NEXT: s_lshl_b32 s5, s5, 8
|
||||||
; GFX10-NEXT: s_lshl_b32 s1, s1, s3
|
; GFX10-NEXT: s_pack_ll_b32_b16 s3, s3, s4
|
||||||
; GFX10-NEXT: s_lshl_b32 s6, s6, 8
|
; GFX10-NEXT: s_pack_ll_b32_b16 s6, s6, s7
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s4
|
; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s5
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s6
|
; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s8
|
||||||
; GFX10-NEXT: s_lshr_b32 s4, s2, 16
|
; GFX10-NEXT: s_lshr_b32 s4, s3, 16
|
||||||
; GFX10-NEXT: s_lshr_b32 s6, s5, 16
|
; GFX10-NEXT: s_lshr_b32 s5, s6, 16
|
||||||
; GFX10-NEXT: v_pk_sub_u16 v0, s0, s1 clamp
|
; GFX10-NEXT: v_pk_sub_u16 v0, s0, s1 clamp
|
||||||
; GFX10-NEXT: s_lshl_b32 s2, s2, s3
|
; GFX10-NEXT: s_lshl_b32 s3, s3, s2
|
||||||
; GFX10-NEXT: s_lshl_b32 s4, s4, 8
|
; GFX10-NEXT: s_lshl_b32 s4, s4, 8
|
||||||
; GFX10-NEXT: s_lshl_b32 s0, s5, s3
|
; GFX10-NEXT: s_lshl_b32 s0, s6, s2
|
||||||
; GFX10-NEXT: s_lshl_b32 s1, s6, 8
|
; GFX10-NEXT: s_lshl_b32 s1, s5, 8
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s2, s2, s4
|
; GFX10-NEXT: s_pack_ll_b32_b16 s2, s3, s4
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s1
|
; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s1
|
||||||
; GFX10-NEXT: v_pk_lshrrev_b16 v0, 8, v0 op_sel_hi:[0,1]
|
; GFX10-NEXT: v_pk_lshrrev_b16 v0, 8, v0 op_sel_hi:[0,1]
|
||||||
; GFX10-NEXT: v_pk_sub_u16 v1, s2, s0 clamp
|
; GFX10-NEXT: v_pk_sub_u16 v1, s2, s0 clamp
|
||||||
|
|
Loading…
Reference in New Issue