forked from OSchip/llvm-project
AMDGPU/GlobalISel: Pack constant G_BUILD_VECTOR_TRUNCs when selecting
This commit is contained in:
parent
0481e1ae3c
commit
5819159995
|
@ -616,11 +616,6 @@ bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr &MI) const {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool isZero(Register Reg, const MachineRegisterInfo &MRI) {
|
|
||||||
int64_t Val;
|
|
||||||
return mi_match(Reg, MRI, m_ICst(Val)) && Val == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR_TRUNC(
|
bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR_TRUNC(
|
||||||
MachineInstr &MI) const {
|
MachineInstr &MI) const {
|
||||||
if (selectImpl(MI, *CoverageInfo))
|
if (selectImpl(MI, *CoverageInfo))
|
||||||
|
@ -645,6 +640,20 @@ bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR_TRUNC(
|
||||||
const DebugLoc &DL = MI.getDebugLoc();
|
const DebugLoc &DL = MI.getDebugLoc();
|
||||||
MachineBasicBlock *BB = MI.getParent();
|
MachineBasicBlock *BB = MI.getParent();
|
||||||
|
|
||||||
|
auto ConstSrc1 = getConstantVRegValWithLookThrough(Src1, *MRI, true, true);
|
||||||
|
if (ConstSrc1) {
|
||||||
|
auto ConstSrc0 = getConstantVRegValWithLookThrough(Src0, *MRI, true, true);
|
||||||
|
if (ConstSrc0) {
|
||||||
|
uint32_t Lo16 = static_cast<uint32_t>(ConstSrc0->Value) & 0xffff;
|
||||||
|
uint32_t Hi16 = static_cast<uint32_t>(ConstSrc1->Value) & 0xffff;
|
||||||
|
|
||||||
|
BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), Dst)
|
||||||
|
.addImm(Lo16 | (Hi16 << 16));
|
||||||
|
MI.eraseFromParent();
|
||||||
|
return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// TODO: This should probably be a combine somewhere
|
// TODO: This should probably be a combine somewhere
|
||||||
// (build_vector_trunc $src0, undef -> copy $src0
|
// (build_vector_trunc $src0, undef -> copy $src0
|
||||||
MachineInstr *Src1Def = getDefIgnoringCopies(Src1, *MRI);
|
MachineInstr *Src1Def = getDefIgnoringCopies(Src1, *MRI);
|
||||||
|
@ -686,7 +695,7 @@ bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR_TRUNC(
|
||||||
} else if (Shift1) {
|
} else if (Shift1) {
|
||||||
Opc = AMDGPU::S_PACK_LH_B32_B16;
|
Opc = AMDGPU::S_PACK_LH_B32_B16;
|
||||||
MI.getOperand(2).setReg(ShiftSrc1);
|
MI.getOperand(2).setReg(ShiftSrc1);
|
||||||
} else if (Shift0 && isZero(Src1, *MRI)) {
|
} else if (Shift0 && ConstSrc1 && ConstSrc1->Value == 0) {
|
||||||
// build_vector_trunc (lshr $src0, 16), 0 -> s_lshr_b32 $src0, 16
|
// build_vector_trunc (lshr $src0, 16), 0 -> s_lshr_b32 $src0, 16
|
||||||
auto MIB = BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_LSHR_B32), Dst)
|
auto MIB = BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_LSHR_B32), Dst)
|
||||||
.addReg(ShiftSrc0)
|
.addReg(ShiftSrc0)
|
||||||
|
|
|
@ -91,9 +91,8 @@ define <2 x i16> @v_add_v2i16_neg_inline_imm_splat(<2 x i16> %a) {
|
||||||
; GFX9-LABEL: v_add_v2i16_neg_inline_imm_splat:
|
; GFX9-LABEL: v_add_v2i16_neg_inline_imm_splat:
|
||||||
; GFX9: ; %bb.0:
|
; GFX9: ; %bb.0:
|
||||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX9-NEXT: s_movk_i32 s4, 0xffc0
|
; GFX9-NEXT: v_mov_b32_e32 v1, 0xffc0ffc0
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s4, s4, s4
|
; GFX9-NEXT: v_pk_add_u16 v0, v0, v1
|
||||||
; GFX9-NEXT: v_pk_add_u16 v0, v0, s4
|
|
||||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
; GFX8-LABEL: v_add_v2i16_neg_inline_imm_splat:
|
; GFX8-LABEL: v_add_v2i16_neg_inline_imm_splat:
|
||||||
|
@ -113,8 +112,8 @@ define <2 x i16> @v_add_v2i16_neg_inline_imm_lo(<2 x i16> %a) {
|
||||||
; GFX9-LABEL: v_add_v2i16_neg_inline_imm_lo:
|
; GFX9-LABEL: v_add_v2i16_neg_inline_imm_lo:
|
||||||
; GFX9: ; %bb.0:
|
; GFX9: ; %bb.0:
|
||||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s4, 0xffffffc0, 4
|
; GFX9-NEXT: v_mov_b32_e32 v1, 0x4ffc0
|
||||||
; GFX9-NEXT: v_pk_add_u16 v0, v0, s4
|
; GFX9-NEXT: v_pk_add_u16 v0, v0, v1
|
||||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
; GFX8-LABEL: v_add_v2i16_neg_inline_imm_lo:
|
; GFX8-LABEL: v_add_v2i16_neg_inline_imm_lo:
|
||||||
|
@ -133,8 +132,8 @@ define <2 x i16> @v_add_v2i16_neg_inline_imm_hi(<2 x i16> %a) {
|
||||||
; GFX9-LABEL: v_add_v2i16_neg_inline_imm_hi:
|
; GFX9-LABEL: v_add_v2i16_neg_inline_imm_hi:
|
||||||
; GFX9: ; %bb.0:
|
; GFX9: ; %bb.0:
|
||||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s4, 4, 0xffffffc0
|
; GFX9-NEXT: v_mov_b32_e32 v1, 0xffc00004
|
||||||
; GFX9-NEXT: v_pk_add_u16 v0, v0, s4
|
; GFX9-NEXT: v_pk_add_u16 v0, v0, v1
|
||||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
; GFX8-LABEL: v_add_v2i16_neg_inline_imm_hi:
|
; GFX8-LABEL: v_add_v2i16_neg_inline_imm_hi:
|
||||||
|
@ -152,13 +151,10 @@ define <2 x i16> @v_add_v2i16_neg_inline_imm_hi(<2 x i16> %a) {
|
||||||
define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_splat(<2 x i16> inreg %a) {
|
define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_splat(<2 x i16> inreg %a) {
|
||||||
; GFX9-LABEL: s_add_v2i16_neg_inline_imm_splat:
|
; GFX9-LABEL: s_add_v2i16_neg_inline_imm_splat:
|
||||||
; GFX9: ; %bb.0:
|
; GFX9: ; %bb.0:
|
||||||
; GFX9-NEXT: s_movk_i32 s1, 0xffc0
|
; GFX9-NEXT: s_lshr_b32 s1, s0, 16
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s1
|
; GFX9-NEXT: s_add_i32 s0, s0, 0xffc0ffc0
|
||||||
; GFX9-NEXT: s_lshr_b32 s2, s0, 16
|
; GFX9-NEXT: s_add_i32 s1, s1, 0xffc0
|
||||||
; GFX9-NEXT: s_lshr_b32 s3, s1, 16
|
; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s1
|
||||||
; GFX9-NEXT: s_add_i32 s0, s0, s1
|
|
||||||
; GFX9-NEXT: s_add_i32 s2, s2, s3
|
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2
|
|
||||||
; GFX9-NEXT: ; return to shader part epilog
|
; GFX9-NEXT: ; return to shader part epilog
|
||||||
;
|
;
|
||||||
; GFX8-LABEL: s_add_v2i16_neg_inline_imm_splat:
|
; GFX8-LABEL: s_add_v2i16_neg_inline_imm_splat:
|
||||||
|
@ -182,12 +178,10 @@ define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_splat(<2 x i16> inreg %a) {
|
||||||
define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_lo(<2 x i16> inreg %a) {
|
define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_lo(<2 x i16> inreg %a) {
|
||||||
; GFX9-LABEL: s_add_v2i16_neg_inline_imm_lo:
|
; GFX9-LABEL: s_add_v2i16_neg_inline_imm_lo:
|
||||||
; GFX9: ; %bb.0:
|
; GFX9: ; %bb.0:
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s1, 0xffffffc0, 4
|
; GFX9-NEXT: s_lshr_b32 s1, s0, 16
|
||||||
; GFX9-NEXT: s_lshr_b32 s2, s0, 16
|
; GFX9-NEXT: s_add_i32 s0, s0, 0x4ffc0
|
||||||
; GFX9-NEXT: s_lshr_b32 s3, s1, 16
|
; GFX9-NEXT: s_add_i32 s1, s1, 4
|
||||||
; GFX9-NEXT: s_add_i32 s0, s0, s1
|
; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s1
|
||||||
; GFX9-NEXT: s_add_i32 s2, s2, s3
|
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2
|
|
||||||
; GFX9-NEXT: ; return to shader part epilog
|
; GFX9-NEXT: ; return to shader part epilog
|
||||||
;
|
;
|
||||||
; GFX8-LABEL: s_add_v2i16_neg_inline_imm_lo:
|
; GFX8-LABEL: s_add_v2i16_neg_inline_imm_lo:
|
||||||
|
@ -210,12 +204,10 @@ define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_lo(<2 x i16> inreg %a) {
|
||||||
define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_hi(<2 x i16> inreg %a) {
|
define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_hi(<2 x i16> inreg %a) {
|
||||||
; GFX9-LABEL: s_add_v2i16_neg_inline_imm_hi:
|
; GFX9-LABEL: s_add_v2i16_neg_inline_imm_hi:
|
||||||
; GFX9: ; %bb.0:
|
; GFX9: ; %bb.0:
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s1, 4, 0xffffffc0
|
; GFX9-NEXT: s_lshr_b32 s1, s0, 16
|
||||||
; GFX9-NEXT: s_lshr_b32 s2, s0, 16
|
; GFX9-NEXT: s_add_i32 s0, s0, 0xffc00004
|
||||||
; GFX9-NEXT: s_lshr_b32 s3, s1, 16
|
; GFX9-NEXT: s_add_i32 s1, s1, 0xffc0
|
||||||
; GFX9-NEXT: s_add_i32 s0, s0, s1
|
; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s1
|
||||||
; GFX9-NEXT: s_add_i32 s2, s2, s3
|
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2
|
|
||||||
; GFX9-NEXT: ; return to shader part epilog
|
; GFX9-NEXT: ; return to shader part epilog
|
||||||
;
|
;
|
||||||
; GFX8-LABEL: s_add_v2i16_neg_inline_imm_hi:
|
; GFX8-LABEL: s_add_v2i16_neg_inline_imm_hi:
|
||||||
|
|
|
@ -522,8 +522,7 @@ define <2 x i16> @v_ashr_v2i16_15(<2 x i16> %value) {
|
||||||
; GFX9-LABEL: v_ashr_v2i16_15:
|
; GFX9-LABEL: v_ashr_v2i16_15:
|
||||||
; GFX9: ; %bb.0:
|
; GFX9: ; %bb.0:
|
||||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s4, 15, 15
|
; GFX9-NEXT: v_pk_ashrrev_i16 v0, 15, v0 op_sel_hi:[0,1]
|
||||||
; GFX9-NEXT: v_pk_ashrrev_i16 v0, s4, v0
|
|
||||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||||
%result = ashr <2 x i16> %value, <i16 15, i16 15>
|
%result = ashr <2 x i16> %value, <i16 15, i16 15>
|
||||||
ret <2 x i16> %result
|
ret <2 x i16> %result
|
||||||
|
|
|
@ -430,3 +430,273 @@ body: |
|
||||||
%5:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %3, %4
|
%5:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %3, %4
|
||||||
S_ENDPGM 0, implicit %5
|
S_ENDPGM 0, implicit %5
|
||||||
...
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: test_build_vector_trunc_s_v2s16_constant_constant
|
||||||
|
legalized: true
|
||||||
|
regBankSelected: true
|
||||||
|
tracksRegLiveness: true
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
|
||||||
|
; GFX9-LABEL: name: test_build_vector_trunc_s_v2s16_constant_constant
|
||||||
|
; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 29884539
|
||||||
|
; GFX9: S_ENDPGM 0, implicit [[S_MOV_B32_]]
|
||||||
|
%0:sgpr(s32) = G_CONSTANT i32 123
|
||||||
|
%1:sgpr(s32) = G_CONSTANT i32 456
|
||||||
|
%2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1
|
||||||
|
S_ENDPGM 0, implicit %2
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: test_build_vector_trunc_s_v2s16_constant_impdef
|
||||||
|
legalized: true
|
||||||
|
regBankSelected: true
|
||||||
|
tracksRegLiveness: true
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
|
||||||
|
; GFX9-LABEL: name: test_build_vector_trunc_s_v2s16_constant_impdef
|
||||||
|
; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 123
|
||||||
|
; GFX9: S_ENDPGM 0, implicit [[S_MOV_B32_]]
|
||||||
|
%0:sgpr(s32) = G_CONSTANT i32 123
|
||||||
|
%1:sgpr(s32) = G_IMPLICIT_DEF
|
||||||
|
%2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1
|
||||||
|
S_ENDPGM 0, implicit %2
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: test_build_vector_trunc_s_v2s16_impdef_constant
|
||||||
|
legalized: true
|
||||||
|
regBankSelected: true
|
||||||
|
tracksRegLiveness: true
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
|
||||||
|
; GFX9-LABEL: name: test_build_vector_trunc_s_v2s16_impdef_constant
|
||||||
|
; GFX9: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
|
||||||
|
; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 123
|
||||||
|
; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[DEF]], [[S_MOV_B32_]]
|
||||||
|
; GFX9: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]]
|
||||||
|
%0:sgpr(s32) = G_IMPLICIT_DEF
|
||||||
|
%1:sgpr(s32) = G_CONSTANT i32 123
|
||||||
|
%2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1
|
||||||
|
S_ENDPGM 0, implicit %2
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: test_build_vector_trunc_s_v2s16_impdef_impdef
|
||||||
|
legalized: true
|
||||||
|
regBankSelected: true
|
||||||
|
tracksRegLiveness: true
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
|
||||||
|
; GFX9-LABEL: name: test_build_vector_trunc_s_v2s16_impdef_impdef
|
||||||
|
; GFX9: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
|
||||||
|
; GFX9: S_ENDPGM 0, implicit [[DEF]]
|
||||||
|
%0:sgpr(s32) = G_IMPLICIT_DEF
|
||||||
|
%1:sgpr(s32) = G_IMPLICIT_DEF
|
||||||
|
%2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1
|
||||||
|
S_ENDPGM 0, implicit %2
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: test_build_vector_trunc_s_v2s16_zext_constant_zext_constant
|
||||||
|
legalized: true
|
||||||
|
regBankSelected: true
|
||||||
|
tracksRegLiveness: true
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
|
||||||
|
; GFX9-LABEL: name: test_build_vector_trunc_s_v2s16_zext_constant_zext_constant
|
||||||
|
; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 29884539
|
||||||
|
; GFX9: S_ENDPGM 0, implicit [[S_MOV_B32_]]
|
||||||
|
%0:sgpr(s16) = G_CONSTANT i16 123
|
||||||
|
%1:sgpr(s16) = G_CONSTANT i16 456
|
||||||
|
%2:sgpr(s32) = G_ZEXT %0
|
||||||
|
%3:sgpr(s32) = G_ZEXT %1
|
||||||
|
%4:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %2, %3
|
||||||
|
S_ENDPGM 0, implicit %4
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: test_build_vector_trunc_s_v2s16_zext_impdef_zext_constant
|
||||||
|
legalized: true
|
||||||
|
regBankSelected: true
|
||||||
|
tracksRegLiveness: true
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
|
||||||
|
; GFX9-LABEL: name: test_build_vector_trunc_s_v2s16_zext_impdef_zext_constant
|
||||||
|
; GFX9: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
|
||||||
|
; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 123
|
||||||
|
; GFX9: [[S_BFE_U32_:%[0-9]+]]:sreg_32 = S_BFE_U32 [[DEF]], 1048576, implicit-def $scc
|
||||||
|
; GFX9: [[S_BFE_U32_1:%[0-9]+]]:sreg_32 = S_BFE_U32 [[S_MOV_B32_]], 1048576, implicit-def $scc
|
||||||
|
; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_BFE_U32_]], [[S_BFE_U32_1]]
|
||||||
|
; GFX9: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]]
|
||||||
|
%0:sgpr(s16) = G_IMPLICIT_DEF
|
||||||
|
%1:sgpr(s16) = G_CONSTANT i16 123
|
||||||
|
%2:sgpr(s32) = G_ZEXT %0
|
||||||
|
%3:sgpr(s32) = G_ZEXT %1
|
||||||
|
%4:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %2, %3
|
||||||
|
S_ENDPGM 0, implicit %4
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: test_build_vector_trunc_s_v2s16_sext_constant_sext_constant
|
||||||
|
legalized: true
|
||||||
|
regBankSelected: true
|
||||||
|
tracksRegLiveness: true
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
|
||||||
|
; GFX9-LABEL: name: test_build_vector_trunc_s_v2s16_sext_constant_sext_constant
|
||||||
|
; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294836208
|
||||||
|
; GFX9: S_ENDPGM 0, implicit [[S_MOV_B32_]]
|
||||||
|
%0:sgpr(s16) = G_CONSTANT i16 -16
|
||||||
|
%1:sgpr(s16) = G_CONSTANT i16 -3
|
||||||
|
%2:sgpr(s32) = G_SEXT %0
|
||||||
|
%3:sgpr(s32) = G_SEXT %1
|
||||||
|
%4:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %2, %3
|
||||||
|
S_ENDPGM 0, implicit %4
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: test_build_vector_trunc_s_v2s16_anyext_constant_anyext_constant
|
||||||
|
legalized: true
|
||||||
|
regBankSelected: true
|
||||||
|
tracksRegLiveness: true
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
|
||||||
|
; GFX9-LABEL: name: test_build_vector_trunc_s_v2s16_anyext_constant_anyext_constant
|
||||||
|
; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 123
|
||||||
|
; GFX9: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 456
|
||||||
|
; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_MOV_B32_]], [[S_MOV_B32_1]]
|
||||||
|
; GFX9: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]]
|
||||||
|
%0:sgpr(s16) = G_CONSTANT i16 123
|
||||||
|
%1:sgpr(s16) = G_CONSTANT i16 456
|
||||||
|
%2:sgpr(s32) = G_ANYEXT %0
|
||||||
|
%3:sgpr(s32) = G_ANYEXT %1
|
||||||
|
%4:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %2, %3
|
||||||
|
S_ENDPGM 0, implicit %4
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: test_build_vector_trunc_s_v2s16_anyext_impdef_anyext_constant
|
||||||
|
legalized: true
|
||||||
|
regBankSelected: true
|
||||||
|
tracksRegLiveness: true
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
|
||||||
|
; GFX9-LABEL: name: test_build_vector_trunc_s_v2s16_anyext_impdef_anyext_constant
|
||||||
|
; GFX9: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
|
||||||
|
; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 123
|
||||||
|
; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[DEF]], [[S_MOV_B32_]]
|
||||||
|
; GFX9: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]]
|
||||||
|
%0:sgpr(s16) = G_IMPLICIT_DEF
|
||||||
|
%1:sgpr(s16) = G_CONSTANT i16 123
|
||||||
|
%2:sgpr(s32) = G_ANYEXT %0
|
||||||
|
%3:sgpr(s32) = G_ANYEXT %1
|
||||||
|
%4:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %2, %3
|
||||||
|
S_ENDPGM 0, implicit %4
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: test_build_vector_trunc_s_v2s16_var_constant
|
||||||
|
legalized: true
|
||||||
|
regBankSelected: true
|
||||||
|
tracksRegLiveness: true
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $sgpr0
|
||||||
|
|
||||||
|
; GFX9-LABEL: name: test_build_vector_trunc_s_v2s16_var_constant
|
||||||
|
; GFX9: liveins: $sgpr0
|
||||||
|
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||||
|
; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 456
|
||||||
|
; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[S_MOV_B32_]]
|
||||||
|
; GFX9: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]]
|
||||||
|
%0:sgpr(s32) = COPY $sgpr0
|
||||||
|
%1:sgpr(s32) = G_CONSTANT i32 456
|
||||||
|
%2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1
|
||||||
|
S_ENDPGM 0, implicit %2
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: test_build_vector_trunc_s_v2s16_constant_var
|
||||||
|
legalized: true
|
||||||
|
regBankSelected: true
|
||||||
|
tracksRegLiveness: true
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $sgpr0
|
||||||
|
|
||||||
|
; GFX9-LABEL: name: test_build_vector_trunc_s_v2s16_constant_var
|
||||||
|
; GFX9: liveins: $sgpr0
|
||||||
|
; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 456
|
||||||
|
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||||
|
; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_MOV_B32_]], [[COPY]]
|
||||||
|
; GFX9: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]]
|
||||||
|
%0:sgpr(s32) = G_CONSTANT i32 456
|
||||||
|
%1:sgpr(s32) = COPY $sgpr0
|
||||||
|
%2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1
|
||||||
|
S_ENDPGM 0, implicit %2
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: test_build_vector_trunc_s_v2s16_var_0
|
||||||
|
legalized: true
|
||||||
|
regBankSelected: true
|
||||||
|
tracksRegLiveness: true
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $sgpr0
|
||||||
|
|
||||||
|
; GFX9-LABEL: name: test_build_vector_trunc_s_v2s16_var_0
|
||||||
|
; GFX9: liveins: $sgpr0
|
||||||
|
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||||
|
; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||||
|
; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[S_MOV_B32_]]
|
||||||
|
; GFX9: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]]
|
||||||
|
%0:sgpr(s32) = COPY $sgpr0
|
||||||
|
%1:sgpr(s32) = G_CONSTANT i32 0
|
||||||
|
%2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1
|
||||||
|
S_ENDPGM 0, implicit %2
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: test_build_vector_trunc_s_v2s16_0_var
|
||||||
|
legalized: true
|
||||||
|
regBankSelected: true
|
||||||
|
tracksRegLiveness: true
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $sgpr0
|
||||||
|
|
||||||
|
; GFX9-LABEL: name: test_build_vector_trunc_s_v2s16_0_var
|
||||||
|
; GFX9: liveins: $sgpr0
|
||||||
|
; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||||
|
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||||
|
; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_MOV_B32_]], [[COPY]]
|
||||||
|
; GFX9: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]]
|
||||||
|
%0:sgpr(s32) = G_CONSTANT i32 0
|
||||||
|
%1:sgpr(s32) = COPY $sgpr0
|
||||||
|
%2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1
|
||||||
|
S_ENDPGM 0, implicit %2
|
||||||
|
...
|
||||||
|
|
|
@ -82,24 +82,21 @@ define i32 @v_sdot2_inline_literal_a(<2 x i16> %b, i32 %c) {
|
||||||
; GFX906-LABEL: v_sdot2_inline_literal_a:
|
; GFX906-LABEL: v_sdot2_inline_literal_a:
|
||||||
; GFX906: ; %bb.0:
|
; GFX906: ; %bb.0:
|
||||||
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX906-NEXT: s_pack_ll_b32_b16 s4, 4, 4
|
; GFX906-NEXT: v_dot2_i32_i16 v0, 4, v0, v1 op_sel_hi:[0,1,1]
|
||||||
; GFX906-NEXT: v_dot2_i32_i16 v0, s4, v0, v1
|
|
||||||
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
; GFX908-LABEL: v_sdot2_inline_literal_a:
|
; GFX908-LABEL: v_sdot2_inline_literal_a:
|
||||||
; GFX908: ; %bb.0:
|
; GFX908: ; %bb.0:
|
||||||
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX908-NEXT: s_pack_ll_b32_b16 s4, 4, 4
|
; GFX908-NEXT: v_dot2_i32_i16 v0, 4, v0, v1 op_sel_hi:[0,1,1]
|
||||||
; GFX908-NEXT: v_dot2_i32_i16 v0, s4, v0, v1
|
|
||||||
; GFX908-NEXT: s_setpc_b64 s[30:31]
|
; GFX908-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
; GFX10-LABEL: v_sdot2_inline_literal_a:
|
; GFX10-LABEL: v_sdot2_inline_literal_a:
|
||||||
; GFX10: ; %bb.0:
|
; GFX10: ; %bb.0:
|
||||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s4, 4, 4
|
; GFX10-NEXT: v_dot2_i32_i16 v0, 4, v0, v1 op_sel_hi:[0,1,1]
|
||||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||||
; GFX10-NEXT: v_dot2_i32_i16 v0, s4, v0, v1
|
|
||||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||||
%r = call i32 @llvm.amdgcn.sdot2(<2 x i16> <i16 4, i16 4>, <2 x i16> %b, i32 %c, i1 false)
|
%r = call i32 @llvm.amdgcn.sdot2(<2 x i16> <i16 4, i16 4>, <2 x i16> %b, i32 %c, i1 false)
|
||||||
ret i32 %r
|
ret i32 %r
|
||||||
|
@ -109,24 +106,21 @@ define i32 @v_sdot2_inline_literal_b(<2 x i16> %a, i32 %c) {
|
||||||
; GFX906-LABEL: v_sdot2_inline_literal_b:
|
; GFX906-LABEL: v_sdot2_inline_literal_b:
|
||||||
; GFX906: ; %bb.0:
|
; GFX906: ; %bb.0:
|
||||||
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX906-NEXT: s_pack_ll_b32_b16 s4, 4, 4
|
; GFX906-NEXT: v_dot2_i32_i16 v0, v0, 4, v1 op_sel_hi:[1,0,1]
|
||||||
; GFX906-NEXT: v_dot2_i32_i16 v0, v0, s4, v1
|
|
||||||
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
; GFX908-LABEL: v_sdot2_inline_literal_b:
|
; GFX908-LABEL: v_sdot2_inline_literal_b:
|
||||||
; GFX908: ; %bb.0:
|
; GFX908: ; %bb.0:
|
||||||
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX908-NEXT: s_pack_ll_b32_b16 s4, 4, 4
|
; GFX908-NEXT: v_dot2_i32_i16 v0, v0, 4, v1 op_sel_hi:[1,0,1]
|
||||||
; GFX908-NEXT: v_dot2_i32_i16 v0, v0, s4, v1
|
|
||||||
; GFX908-NEXT: s_setpc_b64 s[30:31]
|
; GFX908-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
; GFX10-LABEL: v_sdot2_inline_literal_b:
|
; GFX10-LABEL: v_sdot2_inline_literal_b:
|
||||||
; GFX10: ; %bb.0:
|
; GFX10: ; %bb.0:
|
||||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s4, 4, 4
|
; GFX10-NEXT: v_dot2_i32_i16 v0, v0, 4, v1 op_sel_hi:[1,0,1]
|
||||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||||
; GFX10-NEXT: v_dot2_i32_i16 v0, v0, s4, v1
|
|
||||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||||
%r = call i32 @llvm.amdgcn.sdot2(<2 x i16> %a, <2 x i16> <i16 4, i16 4>, i32 %c, i1 false)
|
%r = call i32 @llvm.amdgcn.sdot2(<2 x i16> %a, <2 x i16> <i16 4, i16 4>, i32 %c, i1 false)
|
||||||
ret i32 %r
|
ret i32 %r
|
||||||
|
@ -136,29 +130,21 @@ define i32 @v_sdot2_inline_literal_a_b(<2 x i16> %a, i32 %c) {
|
||||||
; GFX906-LABEL: v_sdot2_inline_literal_a_b:
|
; GFX906-LABEL: v_sdot2_inline_literal_a_b:
|
||||||
; GFX906: ; %bb.0:
|
; GFX906: ; %bb.0:
|
||||||
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX906-NEXT: s_pack_ll_b32_b16 s5, 4, 4
|
; GFX906-NEXT: v_dot2_i32_i16 v0, 8, 4, v1 op_sel_hi:[0,0,1]
|
||||||
; GFX906-NEXT: s_pack_ll_b32_b16 s4, 8, 8
|
|
||||||
; GFX906-NEXT: v_mov_b32_e32 v0, s5
|
|
||||||
; GFX906-NEXT: v_dot2_i32_i16 v0, s4, v0, v1
|
|
||||||
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
; GFX908-LABEL: v_sdot2_inline_literal_a_b:
|
; GFX908-LABEL: v_sdot2_inline_literal_a_b:
|
||||||
; GFX908: ; %bb.0:
|
; GFX908: ; %bb.0:
|
||||||
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX908-NEXT: s_pack_ll_b32_b16 s5, 4, 4
|
; GFX908-NEXT: v_dot2_i32_i16 v0, 8, 4, v1 op_sel_hi:[0,0,1]
|
||||||
; GFX908-NEXT: s_pack_ll_b32_b16 s4, 8, 8
|
|
||||||
; GFX908-NEXT: v_mov_b32_e32 v0, s5
|
|
||||||
; GFX908-NEXT: v_dot2_i32_i16 v0, s4, v0, v1
|
|
||||||
; GFX908-NEXT: s_setpc_b64 s[30:31]
|
; GFX908-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
; GFX10-LABEL: v_sdot2_inline_literal_a_b:
|
; GFX10-LABEL: v_sdot2_inline_literal_a_b:
|
||||||
; GFX10: ; %bb.0:
|
; GFX10: ; %bb.0:
|
||||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s4, 8, 8
|
; GFX10-NEXT: v_dot2_i32_i16 v0, 8, 4, v1 op_sel_hi:[0,0,1]
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s5, 4, 4
|
|
||||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||||
; GFX10-NEXT: v_dot2_i32_i16 v0, s4, s5, v1
|
|
||||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||||
%r = call i32 @llvm.amdgcn.sdot2(<2 x i16> <i16 8, i16 8>, <2 x i16> <i16 4, i16 4>, i32 %c, i1 false)
|
%r = call i32 @llvm.amdgcn.sdot2(<2 x i16> <i16 8, i16 8>, <2 x i16> <i16 4, i16 4>, i32 %c, i1 false)
|
||||||
ret i32 %r
|
ret i32 %r
|
||||||
|
@ -168,29 +154,21 @@ define i32 @v_sdot2_inline_literal_a_b_c() {
|
||||||
; GFX906-LABEL: v_sdot2_inline_literal_a_b_c:
|
; GFX906-LABEL: v_sdot2_inline_literal_a_b_c:
|
||||||
; GFX906: ; %bb.0:
|
; GFX906: ; %bb.0:
|
||||||
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX906-NEXT: s_pack_ll_b32_b16 s5, 4, 4
|
; GFX906-NEXT: v_dot2_i32_i16 v0, 8, 4, 8 op_sel_hi:[0,0,1]
|
||||||
; GFX906-NEXT: s_pack_ll_b32_b16 s4, 8, 8
|
|
||||||
; GFX906-NEXT: v_mov_b32_e32 v0, s5
|
|
||||||
; GFX906-NEXT: v_dot2_i32_i16 v0, s4, v0, 8
|
|
||||||
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
; GFX908-LABEL: v_sdot2_inline_literal_a_b_c:
|
; GFX908-LABEL: v_sdot2_inline_literal_a_b_c:
|
||||||
; GFX908: ; %bb.0:
|
; GFX908: ; %bb.0:
|
||||||
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX908-NEXT: s_pack_ll_b32_b16 s5, 4, 4
|
; GFX908-NEXT: v_dot2_i32_i16 v0, 8, 4, 8 op_sel_hi:[0,0,1]
|
||||||
; GFX908-NEXT: s_pack_ll_b32_b16 s4, 8, 8
|
|
||||||
; GFX908-NEXT: v_mov_b32_e32 v0, s5
|
|
||||||
; GFX908-NEXT: v_dot2_i32_i16 v0, s4, v0, 8
|
|
||||||
; GFX908-NEXT: s_setpc_b64 s[30:31]
|
; GFX908-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
; GFX10-LABEL: v_sdot2_inline_literal_a_b_c:
|
; GFX10-LABEL: v_sdot2_inline_literal_a_b_c:
|
||||||
; GFX10: ; %bb.0:
|
; GFX10: ; %bb.0:
|
||||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s4, 8, 8
|
; GFX10-NEXT: v_dot2_i32_i16 v0, 8, 4, 8 op_sel_hi:[0,0,1]
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s5, 4, 4
|
|
||||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||||
; GFX10-NEXT: v_dot2_i32_i16 v0, s4, s5, 8
|
|
||||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||||
%r = call i32 @llvm.amdgcn.sdot2(<2 x i16> <i16 8, i16 8>, <2 x i16> <i16 4, i16 4>, i32 8, i1 false)
|
%r = call i32 @llvm.amdgcn.sdot2(<2 x i16> <i16 8, i16 8>, <2 x i16> <i16 4, i16 4>, i32 8, i1 false)
|
||||||
ret i32 %r
|
ret i32 %r
|
||||||
|
|
|
@ -82,24 +82,21 @@ define i32 @v_udot2_inline_literal_a(<2 x i16> %b, i32 %c) {
|
||||||
; GFX906-LABEL: v_udot2_inline_literal_a:
|
; GFX906-LABEL: v_udot2_inline_literal_a:
|
||||||
; GFX906: ; %bb.0:
|
; GFX906: ; %bb.0:
|
||||||
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX906-NEXT: s_pack_ll_b32_b16 s4, 4, 4
|
; GFX906-NEXT: v_dot2_u32_u16 v0, 4, v0, v1 op_sel_hi:[0,1,1]
|
||||||
; GFX906-NEXT: v_dot2_u32_u16 v0, s4, v0, v1
|
|
||||||
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
; GFX908-LABEL: v_udot2_inline_literal_a:
|
; GFX908-LABEL: v_udot2_inline_literal_a:
|
||||||
; GFX908: ; %bb.0:
|
; GFX908: ; %bb.0:
|
||||||
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX908-NEXT: s_pack_ll_b32_b16 s4, 4, 4
|
; GFX908-NEXT: v_dot2_u32_u16 v0, 4, v0, v1 op_sel_hi:[0,1,1]
|
||||||
; GFX908-NEXT: v_dot2_u32_u16 v0, s4, v0, v1
|
|
||||||
; GFX908-NEXT: s_setpc_b64 s[30:31]
|
; GFX908-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
; GFX10-LABEL: v_udot2_inline_literal_a:
|
; GFX10-LABEL: v_udot2_inline_literal_a:
|
||||||
; GFX10: ; %bb.0:
|
; GFX10: ; %bb.0:
|
||||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s4, 4, 4
|
; GFX10-NEXT: v_dot2_u32_u16 v0, 4, v0, v1 op_sel_hi:[0,1,1]
|
||||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||||
; GFX10-NEXT: v_dot2_u32_u16 v0, s4, v0, v1
|
|
||||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||||
%r = call i32 @llvm.amdgcn.udot2(<2 x i16> <i16 4, i16 4>, <2 x i16> %b, i32 %c, i1 false)
|
%r = call i32 @llvm.amdgcn.udot2(<2 x i16> <i16 4, i16 4>, <2 x i16> %b, i32 %c, i1 false)
|
||||||
ret i32 %r
|
ret i32 %r
|
||||||
|
@ -109,24 +106,21 @@ define i32 @v_udot2_inline_literal_b(<2 x i16> %a, i32 %c) {
|
||||||
; GFX906-LABEL: v_udot2_inline_literal_b:
|
; GFX906-LABEL: v_udot2_inline_literal_b:
|
||||||
; GFX906: ; %bb.0:
|
; GFX906: ; %bb.0:
|
||||||
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX906-NEXT: s_pack_ll_b32_b16 s4, 4, 4
|
; GFX906-NEXT: v_dot2_u32_u16 v0, v0, 4, v1 op_sel_hi:[1,0,1]
|
||||||
; GFX906-NEXT: v_dot2_u32_u16 v0, v0, s4, v1
|
|
||||||
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
; GFX908-LABEL: v_udot2_inline_literal_b:
|
; GFX908-LABEL: v_udot2_inline_literal_b:
|
||||||
; GFX908: ; %bb.0:
|
; GFX908: ; %bb.0:
|
||||||
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX908-NEXT: s_pack_ll_b32_b16 s4, 4, 4
|
; GFX908-NEXT: v_dot2_u32_u16 v0, v0, 4, v1 op_sel_hi:[1,0,1]
|
||||||
; GFX908-NEXT: v_dot2_u32_u16 v0, v0, s4, v1
|
|
||||||
; GFX908-NEXT: s_setpc_b64 s[30:31]
|
; GFX908-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
; GFX10-LABEL: v_udot2_inline_literal_b:
|
; GFX10-LABEL: v_udot2_inline_literal_b:
|
||||||
; GFX10: ; %bb.0:
|
; GFX10: ; %bb.0:
|
||||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s4, 4, 4
|
; GFX10-NEXT: v_dot2_u32_u16 v0, v0, 4, v1 op_sel_hi:[1,0,1]
|
||||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||||
; GFX10-NEXT: v_dot2_u32_u16 v0, v0, s4, v1
|
|
||||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||||
%r = call i32 @llvm.amdgcn.udot2(<2 x i16> %a, <2 x i16> <i16 4, i16 4>, i32 %c, i1 false)
|
%r = call i32 @llvm.amdgcn.udot2(<2 x i16> %a, <2 x i16> <i16 4, i16 4>, i32 %c, i1 false)
|
||||||
ret i32 %r
|
ret i32 %r
|
||||||
|
@ -136,29 +130,21 @@ define i32 @v_udot2_inline_literal_a_b(<2 x i16> %a, i32 %c) {
|
||||||
; GFX906-LABEL: v_udot2_inline_literal_a_b:
|
; GFX906-LABEL: v_udot2_inline_literal_a_b:
|
||||||
; GFX906: ; %bb.0:
|
; GFX906: ; %bb.0:
|
||||||
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX906-NEXT: s_pack_ll_b32_b16 s5, 4, 4
|
; GFX906-NEXT: v_dot2_u32_u16 v0, 8, 4, v1 op_sel_hi:[0,0,1]
|
||||||
; GFX906-NEXT: s_pack_ll_b32_b16 s4, 8, 8
|
|
||||||
; GFX906-NEXT: v_mov_b32_e32 v0, s5
|
|
||||||
; GFX906-NEXT: v_dot2_u32_u16 v0, s4, v0, v1
|
|
||||||
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
; GFX908-LABEL: v_udot2_inline_literal_a_b:
|
; GFX908-LABEL: v_udot2_inline_literal_a_b:
|
||||||
; GFX908: ; %bb.0:
|
; GFX908: ; %bb.0:
|
||||||
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX908-NEXT: s_pack_ll_b32_b16 s5, 4, 4
|
; GFX908-NEXT: v_dot2_u32_u16 v0, 8, 4, v1 op_sel_hi:[0,0,1]
|
||||||
; GFX908-NEXT: s_pack_ll_b32_b16 s4, 8, 8
|
|
||||||
; GFX908-NEXT: v_mov_b32_e32 v0, s5
|
|
||||||
; GFX908-NEXT: v_dot2_u32_u16 v0, s4, v0, v1
|
|
||||||
; GFX908-NEXT: s_setpc_b64 s[30:31]
|
; GFX908-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
; GFX10-LABEL: v_udot2_inline_literal_a_b:
|
; GFX10-LABEL: v_udot2_inline_literal_a_b:
|
||||||
; GFX10: ; %bb.0:
|
; GFX10: ; %bb.0:
|
||||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s4, 8, 8
|
; GFX10-NEXT: v_dot2_u32_u16 v0, 8, 4, v1 op_sel_hi:[0,0,1]
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s5, 4, 4
|
|
||||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||||
; GFX10-NEXT: v_dot2_u32_u16 v0, s4, s5, v1
|
|
||||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||||
%r = call i32 @llvm.amdgcn.udot2(<2 x i16> <i16 8, i16 8>, <2 x i16> <i16 4, i16 4>, i32 %c, i1 false)
|
%r = call i32 @llvm.amdgcn.udot2(<2 x i16> <i16 8, i16 8>, <2 x i16> <i16 4, i16 4>, i32 %c, i1 false)
|
||||||
ret i32 %r
|
ret i32 %r
|
||||||
|
@ -168,29 +154,21 @@ define i32 @v_udot2_inline_literal_a_b_c() {
|
||||||
; GFX906-LABEL: v_udot2_inline_literal_a_b_c:
|
; GFX906-LABEL: v_udot2_inline_literal_a_b_c:
|
||||||
; GFX906: ; %bb.0:
|
; GFX906: ; %bb.0:
|
||||||
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX906-NEXT: s_pack_ll_b32_b16 s5, 4, 4
|
; GFX906-NEXT: v_dot2_u32_u16 v0, 8, 4, 8 op_sel_hi:[0,0,1]
|
||||||
; GFX906-NEXT: s_pack_ll_b32_b16 s4, 8, 8
|
|
||||||
; GFX906-NEXT: v_mov_b32_e32 v0, s5
|
|
||||||
; GFX906-NEXT: v_dot2_u32_u16 v0, s4, v0, 8
|
|
||||||
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
; GFX908-LABEL: v_udot2_inline_literal_a_b_c:
|
; GFX908-LABEL: v_udot2_inline_literal_a_b_c:
|
||||||
; GFX908: ; %bb.0:
|
; GFX908: ; %bb.0:
|
||||||
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX908-NEXT: s_pack_ll_b32_b16 s5, 4, 4
|
; GFX908-NEXT: v_dot2_u32_u16 v0, 8, 4, 8 op_sel_hi:[0,0,1]
|
||||||
; GFX908-NEXT: s_pack_ll_b32_b16 s4, 8, 8
|
|
||||||
; GFX908-NEXT: v_mov_b32_e32 v0, s5
|
|
||||||
; GFX908-NEXT: v_dot2_u32_u16 v0, s4, v0, 8
|
|
||||||
; GFX908-NEXT: s_setpc_b64 s[30:31]
|
; GFX908-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
; GFX10-LABEL: v_udot2_inline_literal_a_b_c:
|
; GFX10-LABEL: v_udot2_inline_literal_a_b_c:
|
||||||
; GFX10: ; %bb.0:
|
; GFX10: ; %bb.0:
|
||||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s4, 8, 8
|
; GFX10-NEXT: v_dot2_u32_u16 v0, 8, 4, 8 op_sel_hi:[0,0,1]
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s5, 4, 4
|
|
||||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||||
; GFX10-NEXT: v_dot2_u32_u16 v0, s4, s5, 8
|
|
||||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||||
%r = call i32 @llvm.amdgcn.udot2(<2 x i16> <i16 8, i16 8>, <2 x i16> <i16 4, i16 4>, i32 8, i1 false)
|
%r = call i32 @llvm.amdgcn.udot2(<2 x i16> <i16 8, i16 8>, <2 x i16> <i16 4, i16 4>, i32 8, i1 false)
|
||||||
ret i32 %r
|
ret i32 %r
|
||||||
|
|
|
@ -533,8 +533,7 @@ define <2 x i16> @v_lshr_v2i16_15(<2 x i16> %value) {
|
||||||
; GFX9-LABEL: v_lshr_v2i16_15:
|
; GFX9-LABEL: v_lshr_v2i16_15:
|
||||||
; GFX9: ; %bb.0:
|
; GFX9: ; %bb.0:
|
||||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s4, 15, 15
|
; GFX9-NEXT: v_pk_lshrrev_b16 v0, 15, v0 op_sel_hi:[0,1]
|
||||||
; GFX9-NEXT: v_pk_lshrrev_b16 v0, s4, v0
|
|
||||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||||
%result = lshr <2 x i16> %value, <i16 15, i16 15>
|
%result = lshr <2 x i16> %value, <i16 15, i16 15>
|
||||||
ret <2 x i16> %result
|
ret <2 x i16> %result
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -529,8 +529,7 @@ define <2 x i16> @v_shl_v2i16_15(<2 x i16> %value) {
|
||||||
; GFX9-LABEL: v_shl_v2i16_15:
|
; GFX9-LABEL: v_shl_v2i16_15:
|
||||||
; GFX9: ; %bb.0:
|
; GFX9: ; %bb.0:
|
||||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s4, 15, 15
|
; GFX9-NEXT: v_pk_lshlrev_b16 v0, 15, v0 op_sel_hi:[0,1]
|
||||||
; GFX9-NEXT: v_pk_lshlrev_b16 v0, s4, v0
|
|
||||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||||
%result = shl <2 x i16> %value, <i16 15, i16 15>
|
%result = shl <2 x i16> %value, <i16 15, i16 15>
|
||||||
ret <2 x i16> %result
|
ret <2 x i16> %result
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -2371,8 +2371,7 @@ define <2 x i16> @v_uaddsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) {
|
||||||
; GFX9-LABEL: v_uaddsat_v2i16:
|
; GFX9-LABEL: v_uaddsat_v2i16:
|
||||||
; GFX9: ; %bb.0:
|
; GFX9: ; %bb.0:
|
||||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s4, -1, -1
|
; GFX9-NEXT: v_xor_b32_e32 v2, -1, v0
|
||||||
; GFX9-NEXT: v_xor_b32_e32 v2, s4, v0
|
|
||||||
; GFX9-NEXT: v_pk_min_u16 v1, v2, v1
|
; GFX9-NEXT: v_pk_min_u16 v1, v2, v1
|
||||||
; GFX9-NEXT: v_pk_add_u16 v0, v0, v1
|
; GFX9-NEXT: v_pk_add_u16 v0, v0, v1
|
||||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||||
|
@ -2381,9 +2380,8 @@ define <2 x i16> @v_uaddsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) {
|
||||||
; GFX10: ; %bb.0:
|
; GFX10: ; %bb.0:
|
||||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s4, -1, -1
|
; GFX10-NEXT: v_xor_b32_e32 v2, -1, v0
|
||||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||||
; GFX10-NEXT: v_xor_b32_e32 v2, s4, v0
|
|
||||||
; GFX10-NEXT: v_pk_min_u16 v1, v2, v1
|
; GFX10-NEXT: v_pk_min_u16 v1, v2, v1
|
||||||
; GFX10-NEXT: v_pk_add_u16 v0, v0, v1
|
; GFX10-NEXT: v_pk_add_u16 v0, v0, v1
|
||||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||||
|
@ -2439,8 +2437,7 @@ define amdgpu_ps i32 @s_uaddsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs
|
||||||
;
|
;
|
||||||
; GFX9-LABEL: s_uaddsat_v2i16:
|
; GFX9-LABEL: s_uaddsat_v2i16:
|
||||||
; GFX9: ; %bb.0:
|
; GFX9: ; %bb.0:
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s2, -1, -1
|
; GFX9-NEXT: s_xor_b32 s2, s0, -1
|
||||||
; GFX9-NEXT: s_xor_b32 s2, s0, s2
|
|
||||||
; GFX9-NEXT: s_mov_b32 s4, 0xffff
|
; GFX9-NEXT: s_mov_b32 s4, 0xffff
|
||||||
; GFX9-NEXT: s_lshr_b32 s3, s2, 16
|
; GFX9-NEXT: s_lshr_b32 s3, s2, 16
|
||||||
; GFX9-NEXT: s_lshr_b32 s5, s1, 16
|
; GFX9-NEXT: s_lshr_b32 s5, s1, 16
|
||||||
|
@ -2460,15 +2457,14 @@ define amdgpu_ps i32 @s_uaddsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs
|
||||||
;
|
;
|
||||||
; GFX10-LABEL: s_uaddsat_v2i16:
|
; GFX10-LABEL: s_uaddsat_v2i16:
|
||||||
; GFX10: ; %bb.0:
|
; GFX10: ; %bb.0:
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s2, -1, -1
|
; GFX10-NEXT: s_xor_b32 s2, s0, -1
|
||||||
; GFX10-NEXT: s_mov_b32 s3, 0xffff
|
; GFX10-NEXT: s_mov_b32 s3, 0xffff
|
||||||
; GFX10-NEXT: s_xor_b32 s2, s0, s2
|
|
||||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
|
||||||
; GFX10-NEXT: s_lshr_b32 s4, s2, 16
|
; GFX10-NEXT: s_lshr_b32 s4, s2, 16
|
||||||
; GFX10-NEXT: s_and_b32 s2, s2, s3
|
; GFX10-NEXT: s_and_b32 s2, s2, s3
|
||||||
; GFX10-NEXT: s_and_b32 s3, s1, s3
|
; GFX10-NEXT: s_and_b32 s3, s1, s3
|
||||||
; GFX10-NEXT: s_lshr_b32 s1, s1, 16
|
; GFX10-NEXT: s_lshr_b32 s1, s1, 16
|
||||||
; GFX10-NEXT: s_cmp_lt_u32 s2, s3
|
; GFX10-NEXT: s_cmp_lt_u32 s2, s3
|
||||||
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||||
; GFX10-NEXT: s_cselect_b32 s2, s2, s3
|
; GFX10-NEXT: s_cselect_b32 s2, s2, s3
|
||||||
; GFX10-NEXT: s_cmp_lt_u32 s4, s1
|
; GFX10-NEXT: s_cmp_lt_u32 s4, s1
|
||||||
; GFX10-NEXT: s_cselect_b32 s1, s4, s1
|
; GFX10-NEXT: s_cselect_b32 s1, s4, s1
|
||||||
|
@ -2522,17 +2518,15 @@ define amdgpu_ps float @uaddsat_v2i16_sv(<2 x i16> inreg %lhs, <2 x i16> %rhs) {
|
||||||
;
|
;
|
||||||
; GFX9-LABEL: uaddsat_v2i16_sv:
|
; GFX9-LABEL: uaddsat_v2i16_sv:
|
||||||
; GFX9: ; %bb.0:
|
; GFX9: ; %bb.0:
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s1, -1, -1
|
; GFX9-NEXT: s_xor_b32 s1, s0, -1
|
||||||
; GFX9-NEXT: s_xor_b32 s1, s0, s1
|
|
||||||
; GFX9-NEXT: v_pk_min_u16 v0, s1, v0
|
; GFX9-NEXT: v_pk_min_u16 v0, s1, v0
|
||||||
; GFX9-NEXT: v_pk_add_u16 v0, s0, v0
|
; GFX9-NEXT: v_pk_add_u16 v0, s0, v0
|
||||||
; GFX9-NEXT: ; return to shader part epilog
|
; GFX9-NEXT: ; return to shader part epilog
|
||||||
;
|
;
|
||||||
; GFX10-LABEL: uaddsat_v2i16_sv:
|
; GFX10-LABEL: uaddsat_v2i16_sv:
|
||||||
; GFX10: ; %bb.0:
|
; GFX10: ; %bb.0:
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s1, -1, -1
|
; GFX10-NEXT: s_xor_b32 s1, s0, -1
|
||||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||||
; GFX10-NEXT: s_xor_b32 s1, s0, s1
|
|
||||||
; GFX10-NEXT: v_pk_min_u16 v0, s1, v0
|
; GFX10-NEXT: v_pk_min_u16 v0, s1, v0
|
||||||
; GFX10-NEXT: v_pk_add_u16 v0, s0, v0
|
; GFX10-NEXT: v_pk_add_u16 v0, s0, v0
|
||||||
; GFX10-NEXT: ; return to shader part epilog
|
; GFX10-NEXT: ; return to shader part epilog
|
||||||
|
@ -2578,17 +2572,15 @@ define amdgpu_ps float @uaddsat_v2i16_vs(<2 x i16> %lhs, <2 x i16> inreg %rhs) {
|
||||||
;
|
;
|
||||||
; GFX9-LABEL: uaddsat_v2i16_vs:
|
; GFX9-LABEL: uaddsat_v2i16_vs:
|
||||||
; GFX9: ; %bb.0:
|
; GFX9: ; %bb.0:
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s1, -1, -1
|
; GFX9-NEXT: v_xor_b32_e32 v1, -1, v0
|
||||||
; GFX9-NEXT: v_xor_b32_e32 v1, s1, v0
|
|
||||||
; GFX9-NEXT: v_pk_min_u16 v1, v1, s0
|
; GFX9-NEXT: v_pk_min_u16 v1, v1, s0
|
||||||
; GFX9-NEXT: v_pk_add_u16 v0, v0, v1
|
; GFX9-NEXT: v_pk_add_u16 v0, v0, v1
|
||||||
; GFX9-NEXT: ; return to shader part epilog
|
; GFX9-NEXT: ; return to shader part epilog
|
||||||
;
|
;
|
||||||
; GFX10-LABEL: uaddsat_v2i16_vs:
|
; GFX10-LABEL: uaddsat_v2i16_vs:
|
||||||
; GFX10: ; %bb.0:
|
; GFX10: ; %bb.0:
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s1, -1, -1
|
; GFX10-NEXT: v_xor_b32_e32 v1, -1, v0
|
||||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||||
; GFX10-NEXT: v_xor_b32_e32 v1, s1, v0
|
|
||||||
; GFX10-NEXT: v_pk_min_u16 v1, v1, s0
|
; GFX10-NEXT: v_pk_min_u16 v1, v1, s0
|
||||||
; GFX10-NEXT: v_pk_add_u16 v0, v0, v1
|
; GFX10-NEXT: v_pk_add_u16 v0, v0, v1
|
||||||
; GFX10-NEXT: ; return to shader part epilog
|
; GFX10-NEXT: ; return to shader part epilog
|
||||||
|
@ -2671,11 +2663,10 @@ define <2 x float> @v_uaddsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
|
||||||
; GFX9-LABEL: v_uaddsat_v4i16:
|
; GFX9-LABEL: v_uaddsat_v4i16:
|
||||||
; GFX9: ; %bb.0:
|
; GFX9: ; %bb.0:
|
||||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s4, -1, -1
|
; GFX9-NEXT: v_xor_b32_e32 v4, -1, v0
|
||||||
; GFX9-NEXT: v_xor_b32_e32 v4, s4, v0
|
|
||||||
; GFX9-NEXT: v_pk_min_u16 v2, v4, v2
|
; GFX9-NEXT: v_pk_min_u16 v2, v4, v2
|
||||||
; GFX9-NEXT: v_pk_add_u16 v0, v0, v2
|
; GFX9-NEXT: v_pk_add_u16 v0, v0, v2
|
||||||
; GFX9-NEXT: v_xor_b32_e32 v2, s4, v1
|
; GFX9-NEXT: v_xor_b32_e32 v2, -1, v1
|
||||||
; GFX9-NEXT: v_pk_min_u16 v2, v2, v3
|
; GFX9-NEXT: v_pk_min_u16 v2, v2, v3
|
||||||
; GFX9-NEXT: v_pk_add_u16 v1, v1, v2
|
; GFX9-NEXT: v_pk_add_u16 v1, v1, v2
|
||||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||||
|
@ -2684,10 +2675,9 @@ define <2 x float> @v_uaddsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
|
||||||
; GFX10: ; %bb.0:
|
; GFX10: ; %bb.0:
|
||||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s4, -1, -1
|
; GFX10-NEXT: v_xor_b32_e32 v4, -1, v0
|
||||||
|
; GFX10-NEXT: v_xor_b32_e32 v5, -1, v1
|
||||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||||
; GFX10-NEXT: v_xor_b32_e32 v4, s4, v0
|
|
||||||
; GFX10-NEXT: v_xor_b32_e32 v5, s4, v1
|
|
||||||
; GFX10-NEXT: v_pk_min_u16 v2, v4, v2
|
; GFX10-NEXT: v_pk_min_u16 v2, v4, v2
|
||||||
; GFX10-NEXT: v_pk_min_u16 v3, v5, v3
|
; GFX10-NEXT: v_pk_min_u16 v3, v5, v3
|
||||||
; GFX10-NEXT: v_pk_add_u16 v0, v0, v2
|
; GFX10-NEXT: v_pk_add_u16 v0, v0, v2
|
||||||
|
@ -2782,28 +2772,27 @@ define amdgpu_ps <2 x i32> @s_uaddsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
|
||||||
;
|
;
|
||||||
; GFX9-LABEL: s_uaddsat_v4i16:
|
; GFX9-LABEL: s_uaddsat_v4i16:
|
||||||
; GFX9: ; %bb.0:
|
; GFX9: ; %bb.0:
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s4, -1, -1
|
; GFX9-NEXT: s_xor_b32 s4, s0, -1
|
||||||
; GFX9-NEXT: s_xor_b32 s5, s0, s4
|
; GFX9-NEXT: s_mov_b32 s6, 0xffff
|
||||||
; GFX9-NEXT: s_mov_b32 s7, 0xffff
|
; GFX9-NEXT: s_lshr_b32 s5, s4, 16
|
||||||
; GFX9-NEXT: s_lshr_b32 s6, s5, 16
|
; GFX9-NEXT: s_lshr_b32 s7, s2, 16
|
||||||
; GFX9-NEXT: s_lshr_b32 s8, s2, 16
|
; GFX9-NEXT: s_and_b32 s4, s4, s6
|
||||||
; GFX9-NEXT: s_and_b32 s5, s5, s7
|
; GFX9-NEXT: s_and_b32 s2, s2, s6
|
||||||
; GFX9-NEXT: s_and_b32 s2, s2, s7
|
; GFX9-NEXT: s_cmp_lt_u32 s4, s2
|
||||||
; GFX9-NEXT: s_cmp_lt_u32 s5, s2
|
; GFX9-NEXT: s_cselect_b32 s2, s4, s2
|
||||||
; GFX9-NEXT: s_cselect_b32 s2, s5, s2
|
; GFX9-NEXT: s_cmp_lt_u32 s5, s7
|
||||||
; GFX9-NEXT: s_cmp_lt_u32 s6, s8
|
; GFX9-NEXT: s_cselect_b32 s4, s5, s7
|
||||||
; GFX9-NEXT: s_cselect_b32 s5, s6, s8
|
; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s4
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s5
|
; GFX9-NEXT: s_lshr_b32 s4, s0, 16
|
||||||
; GFX9-NEXT: s_lshr_b32 s5, s0, 16
|
; GFX9-NEXT: s_lshr_b32 s5, s2, 16
|
||||||
; GFX9-NEXT: s_lshr_b32 s6, s2, 16
|
|
||||||
; GFX9-NEXT: s_add_i32 s0, s0, s2
|
; GFX9-NEXT: s_add_i32 s0, s0, s2
|
||||||
; GFX9-NEXT: s_xor_b32 s2, s1, s4
|
; GFX9-NEXT: s_add_i32 s4, s4, s5
|
||||||
; GFX9-NEXT: s_add_i32 s5, s5, s6
|
; GFX9-NEXT: s_xor_b32 s2, s1, -1
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s5
|
; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s4
|
||||||
; GFX9-NEXT: s_lshr_b32 s4, s2, 16
|
; GFX9-NEXT: s_lshr_b32 s4, s2, 16
|
||||||
; GFX9-NEXT: s_lshr_b32 s5, s3, 16
|
; GFX9-NEXT: s_lshr_b32 s5, s3, 16
|
||||||
; GFX9-NEXT: s_and_b32 s2, s2, s7
|
; GFX9-NEXT: s_and_b32 s2, s2, s6
|
||||||
; GFX9-NEXT: s_and_b32 s3, s3, s7
|
; GFX9-NEXT: s_and_b32 s3, s3, s6
|
||||||
; GFX9-NEXT: s_cmp_lt_u32 s2, s3
|
; GFX9-NEXT: s_cmp_lt_u32 s2, s3
|
||||||
; GFX9-NEXT: s_cselect_b32 s2, s2, s3
|
; GFX9-NEXT: s_cselect_b32 s2, s2, s3
|
||||||
; GFX9-NEXT: s_cmp_lt_u32 s4, s5
|
; GFX9-NEXT: s_cmp_lt_u32 s4, s5
|
||||||
|
@ -2818,38 +2807,37 @@ define amdgpu_ps <2 x i32> @s_uaddsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
|
||||||
;
|
;
|
||||||
; GFX10-LABEL: s_uaddsat_v4i16:
|
; GFX10-LABEL: s_uaddsat_v4i16:
|
||||||
; GFX10: ; %bb.0:
|
; GFX10: ; %bb.0:
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s4, -1, -1
|
; GFX10-NEXT: s_xor_b32 s4, s0, -1
|
||||||
; GFX10-NEXT: s_mov_b32 s6, 0xffff
|
; GFX10-NEXT: s_mov_b32 s5, 0xffff
|
||||||
; GFX10-NEXT: s_xor_b32 s5, s0, s4
|
; GFX10-NEXT: s_lshr_b32 s6, s4, 16
|
||||||
; GFX10-NEXT: s_and_b32 s8, s2, s6
|
; GFX10-NEXT: s_and_b32 s7, s2, s5
|
||||||
; GFX10-NEXT: s_lshr_b32 s7, s5, 16
|
; GFX10-NEXT: s_and_b32 s4, s4, s5
|
||||||
; GFX10-NEXT: s_and_b32 s5, s5, s6
|
|
||||||
; GFX10-NEXT: s_lshr_b32 s2, s2, 16
|
; GFX10-NEXT: s_lshr_b32 s2, s2, 16
|
||||||
; GFX10-NEXT: s_cmp_lt_u32 s5, s8
|
; GFX10-NEXT: s_cmp_lt_u32 s4, s7
|
||||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||||
; GFX10-NEXT: s_cselect_b32 s5, s5, s8
|
; GFX10-NEXT: s_cselect_b32 s4, s4, s7
|
||||||
; GFX10-NEXT: s_cmp_lt_u32 s7, s2
|
; GFX10-NEXT: s_cmp_lt_u32 s6, s2
|
||||||
; GFX10-NEXT: s_cselect_b32 s2, s7, s2
|
; GFX10-NEXT: s_cselect_b32 s2, s6, s2
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s2, s5, s2
|
; GFX10-NEXT: s_pack_ll_b32_b16 s2, s4, s2
|
||||||
; GFX10-NEXT: s_lshr_b32 s5, s0, 16
|
; GFX10-NEXT: s_lshr_b32 s4, s0, 16
|
||||||
; GFX10-NEXT: s_lshr_b32 s7, s2, 16
|
; GFX10-NEXT: s_lshr_b32 s6, s2, 16
|
||||||
; GFX10-NEXT: s_add_i32 s0, s0, s2
|
; GFX10-NEXT: s_add_i32 s0, s0, s2
|
||||||
; GFX10-NEXT: s_xor_b32 s2, s1, s4
|
; GFX10-NEXT: s_xor_b32 s2, s1, -1
|
||||||
; GFX10-NEXT: s_add_i32 s5, s5, s7
|
; GFX10-NEXT: s_add_i32 s4, s4, s6
|
||||||
; GFX10-NEXT: s_lshr_b32 s4, s2, 16
|
; GFX10-NEXT: s_lshr_b32 s6, s2, 16
|
||||||
; GFX10-NEXT: s_and_b32 s2, s2, s6
|
; GFX10-NEXT: s_and_b32 s2, s2, s5
|
||||||
; GFX10-NEXT: s_and_b32 s6, s3, s6
|
; GFX10-NEXT: s_and_b32 s5, s3, s5
|
||||||
; GFX10-NEXT: s_lshr_b32 s3, s3, 16
|
; GFX10-NEXT: s_lshr_b32 s3, s3, 16
|
||||||
; GFX10-NEXT: s_cmp_lt_u32 s2, s6
|
; GFX10-NEXT: s_cmp_lt_u32 s2, s5
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s5
|
; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s4
|
||||||
; GFX10-NEXT: s_cselect_b32 s2, s2, s6
|
; GFX10-NEXT: s_cselect_b32 s2, s2, s5
|
||||||
; GFX10-NEXT: s_cmp_lt_u32 s4, s3
|
; GFX10-NEXT: s_cmp_lt_u32 s6, s3
|
||||||
; GFX10-NEXT: s_cselect_b32 s3, s4, s3
|
; GFX10-NEXT: s_cselect_b32 s3, s6, s3
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s2, s2, s3
|
; GFX10-NEXT: s_pack_ll_b32_b16 s2, s2, s3
|
||||||
; GFX10-NEXT: s_lshr_b32 s3, s1, 16
|
; GFX10-NEXT: s_lshr_b32 s3, s1, 16
|
||||||
; GFX10-NEXT: s_lshr_b32 s4, s2, 16
|
; GFX10-NEXT: s_lshr_b32 s5, s2, 16
|
||||||
; GFX10-NEXT: s_add_i32 s1, s1, s2
|
; GFX10-NEXT: s_add_i32 s1, s1, s2
|
||||||
; GFX10-NEXT: s_add_i32 s3, s3, s4
|
; GFX10-NEXT: s_add_i32 s3, s3, s5
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s3
|
; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s3
|
||||||
; GFX10-NEXT: ; return to shader part epilog
|
; GFX10-NEXT: ; return to shader part epilog
|
||||||
%result = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
|
%result = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
|
||||||
|
@ -2955,14 +2943,13 @@ define <3 x float> @v_uaddsat_v6i16(<6 x i16> %lhs, <6 x i16> %rhs) {
|
||||||
; GFX9-LABEL: v_uaddsat_v6i16:
|
; GFX9-LABEL: v_uaddsat_v6i16:
|
||||||
; GFX9: ; %bb.0:
|
; GFX9: ; %bb.0:
|
||||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s4, -1, -1
|
; GFX9-NEXT: v_xor_b32_e32 v6, -1, v0
|
||||||
; GFX9-NEXT: v_xor_b32_e32 v6, s4, v0
|
|
||||||
; GFX9-NEXT: v_pk_min_u16 v3, v6, v3
|
; GFX9-NEXT: v_pk_min_u16 v3, v6, v3
|
||||||
; GFX9-NEXT: v_pk_add_u16 v0, v0, v3
|
; GFX9-NEXT: v_pk_add_u16 v0, v0, v3
|
||||||
; GFX9-NEXT: v_xor_b32_e32 v3, s4, v1
|
; GFX9-NEXT: v_xor_b32_e32 v3, -1, v1
|
||||||
; GFX9-NEXT: v_pk_min_u16 v3, v3, v4
|
; GFX9-NEXT: v_pk_min_u16 v3, v3, v4
|
||||||
; GFX9-NEXT: v_pk_add_u16 v1, v1, v3
|
; GFX9-NEXT: v_pk_add_u16 v1, v1, v3
|
||||||
; GFX9-NEXT: v_xor_b32_e32 v3, s4, v2
|
; GFX9-NEXT: v_xor_b32_e32 v3, -1, v2
|
||||||
; GFX9-NEXT: v_pk_min_u16 v3, v3, v5
|
; GFX9-NEXT: v_pk_min_u16 v3, v3, v5
|
||||||
; GFX9-NEXT: v_pk_add_u16 v2, v2, v3
|
; GFX9-NEXT: v_pk_add_u16 v2, v2, v3
|
||||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||||
|
@ -2971,11 +2958,10 @@ define <3 x float> @v_uaddsat_v6i16(<6 x i16> %lhs, <6 x i16> %rhs) {
|
||||||
; GFX10: ; %bb.0:
|
; GFX10: ; %bb.0:
|
||||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s4, -1, -1
|
; GFX10-NEXT: v_xor_b32_e32 v6, -1, v0
|
||||||
|
; GFX10-NEXT: v_xor_b32_e32 v7, -1, v1
|
||||||
|
; GFX10-NEXT: v_xor_b32_e32 v8, -1, v2
|
||||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||||
; GFX10-NEXT: v_xor_b32_e32 v6, s4, v0
|
|
||||||
; GFX10-NEXT: v_xor_b32_e32 v7, s4, v1
|
|
||||||
; GFX10-NEXT: v_xor_b32_e32 v8, s4, v2
|
|
||||||
; GFX10-NEXT: v_pk_min_u16 v3, v6, v3
|
; GFX10-NEXT: v_pk_min_u16 v3, v6, v3
|
||||||
; GFX10-NEXT: v_pk_min_u16 v4, v7, v4
|
; GFX10-NEXT: v_pk_min_u16 v4, v7, v4
|
||||||
; GFX10-NEXT: v_pk_min_u16 v5, v8, v5
|
; GFX10-NEXT: v_pk_min_u16 v5, v8, v5
|
||||||
|
@ -3108,43 +3094,42 @@ define amdgpu_ps <3 x i32> @s_uaddsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
|
||||||
;
|
;
|
||||||
; GFX9-LABEL: s_uaddsat_v6i16:
|
; GFX9-LABEL: s_uaddsat_v6i16:
|
||||||
; GFX9: ; %bb.0:
|
; GFX9: ; %bb.0:
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s6, -1, -1
|
; GFX9-NEXT: s_xor_b32 s6, s0, -1
|
||||||
; GFX9-NEXT: s_xor_b32 s7, s0, s6
|
; GFX9-NEXT: s_mov_b32 s8, 0xffff
|
||||||
; GFX9-NEXT: s_mov_b32 s9, 0xffff
|
; GFX9-NEXT: s_lshr_b32 s7, s6, 16
|
||||||
; GFX9-NEXT: s_lshr_b32 s8, s7, 16
|
; GFX9-NEXT: s_lshr_b32 s9, s3, 16
|
||||||
; GFX9-NEXT: s_lshr_b32 s10, s3, 16
|
; GFX9-NEXT: s_and_b32 s6, s6, s8
|
||||||
; GFX9-NEXT: s_and_b32 s7, s7, s9
|
; GFX9-NEXT: s_and_b32 s3, s3, s8
|
||||||
; GFX9-NEXT: s_and_b32 s3, s3, s9
|
; GFX9-NEXT: s_cmp_lt_u32 s6, s3
|
||||||
; GFX9-NEXT: s_cmp_lt_u32 s7, s3
|
; GFX9-NEXT: s_cselect_b32 s3, s6, s3
|
||||||
; GFX9-NEXT: s_cselect_b32 s3, s7, s3
|
; GFX9-NEXT: s_cmp_lt_u32 s7, s9
|
||||||
; GFX9-NEXT: s_cmp_lt_u32 s8, s10
|
; GFX9-NEXT: s_cselect_b32 s6, s7, s9
|
||||||
; GFX9-NEXT: s_cselect_b32 s7, s8, s10
|
; GFX9-NEXT: s_pack_ll_b32_b16 s3, s3, s6
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s3, s3, s7
|
; GFX9-NEXT: s_lshr_b32 s6, s0, 16
|
||||||
; GFX9-NEXT: s_lshr_b32 s7, s0, 16
|
|
||||||
; GFX9-NEXT: s_lshr_b32 s8, s3, 16
|
|
||||||
; GFX9-NEXT: s_add_i32 s0, s0, s3
|
|
||||||
; GFX9-NEXT: s_add_i32 s7, s7, s8
|
|
||||||
; GFX9-NEXT: s_xor_b32 s3, s1, s6
|
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s7
|
|
||||||
; GFX9-NEXT: s_lshr_b32 s7, s3, 16
|
; GFX9-NEXT: s_lshr_b32 s7, s3, 16
|
||||||
; GFX9-NEXT: s_lshr_b32 s8, s4, 16
|
; GFX9-NEXT: s_add_i32 s0, s0, s3
|
||||||
; GFX9-NEXT: s_and_b32 s3, s3, s9
|
; GFX9-NEXT: s_add_i32 s6, s6, s7
|
||||||
; GFX9-NEXT: s_and_b32 s4, s4, s9
|
; GFX9-NEXT: s_xor_b32 s3, s1, -1
|
||||||
|
; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s6
|
||||||
|
; GFX9-NEXT: s_lshr_b32 s6, s3, 16
|
||||||
|
; GFX9-NEXT: s_lshr_b32 s7, s4, 16
|
||||||
|
; GFX9-NEXT: s_and_b32 s3, s3, s8
|
||||||
|
; GFX9-NEXT: s_and_b32 s4, s4, s8
|
||||||
; GFX9-NEXT: s_cmp_lt_u32 s3, s4
|
; GFX9-NEXT: s_cmp_lt_u32 s3, s4
|
||||||
; GFX9-NEXT: s_cselect_b32 s3, s3, s4
|
; GFX9-NEXT: s_cselect_b32 s3, s3, s4
|
||||||
; GFX9-NEXT: s_cmp_lt_u32 s7, s8
|
; GFX9-NEXT: s_cmp_lt_u32 s6, s7
|
||||||
; GFX9-NEXT: s_cselect_b32 s4, s7, s8
|
; GFX9-NEXT: s_cselect_b32 s4, s6, s7
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s3, s3, s4
|
; GFX9-NEXT: s_pack_ll_b32_b16 s3, s3, s4
|
||||||
; GFX9-NEXT: s_lshr_b32 s4, s1, 16
|
; GFX9-NEXT: s_lshr_b32 s4, s1, 16
|
||||||
; GFX9-NEXT: s_lshr_b32 s7, s3, 16
|
; GFX9-NEXT: s_lshr_b32 s6, s3, 16
|
||||||
; GFX9-NEXT: s_add_i32 s1, s1, s3
|
; GFX9-NEXT: s_add_i32 s1, s1, s3
|
||||||
; GFX9-NEXT: s_add_i32 s4, s4, s7
|
; GFX9-NEXT: s_add_i32 s4, s4, s6
|
||||||
; GFX9-NEXT: s_xor_b32 s3, s2, s6
|
; GFX9-NEXT: s_xor_b32 s3, s2, -1
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s4
|
; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s4
|
||||||
; GFX9-NEXT: s_lshr_b32 s4, s3, 16
|
; GFX9-NEXT: s_lshr_b32 s4, s3, 16
|
||||||
; GFX9-NEXT: s_lshr_b32 s6, s5, 16
|
; GFX9-NEXT: s_lshr_b32 s6, s5, 16
|
||||||
; GFX9-NEXT: s_and_b32 s3, s3, s9
|
; GFX9-NEXT: s_and_b32 s3, s3, s8
|
||||||
; GFX9-NEXT: s_and_b32 s5, s5, s9
|
; GFX9-NEXT: s_and_b32 s5, s5, s8
|
||||||
; GFX9-NEXT: s_cmp_lt_u32 s3, s5
|
; GFX9-NEXT: s_cmp_lt_u32 s3, s5
|
||||||
; GFX9-NEXT: s_cselect_b32 s3, s3, s5
|
; GFX9-NEXT: s_cselect_b32 s3, s3, s5
|
||||||
; GFX9-NEXT: s_cmp_lt_u32 s4, s6
|
; GFX9-NEXT: s_cmp_lt_u32 s4, s6
|
||||||
|
@ -3159,48 +3144,47 @@ define amdgpu_ps <3 x i32> @s_uaddsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
|
||||||
;
|
;
|
||||||
; GFX10-LABEL: s_uaddsat_v6i16:
|
; GFX10-LABEL: s_uaddsat_v6i16:
|
||||||
; GFX10: ; %bb.0:
|
; GFX10: ; %bb.0:
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s6, -1, -1
|
; GFX10-NEXT: s_xor_b32 s6, s0, -1
|
||||||
; GFX10-NEXT: s_mov_b32 s8, 0xffff
|
; GFX10-NEXT: s_mov_b32 s7, 0xffff
|
||||||
; GFX10-NEXT: s_xor_b32 s7, s0, s6
|
; GFX10-NEXT: s_lshr_b32 s8, s6, 16
|
||||||
; GFX10-NEXT: s_and_b32 s10, s3, s8
|
; GFX10-NEXT: s_and_b32 s9, s3, s7
|
||||||
; GFX10-NEXT: s_lshr_b32 s9, s7, 16
|
; GFX10-NEXT: s_and_b32 s6, s6, s7
|
||||||
; GFX10-NEXT: s_and_b32 s7, s7, s8
|
|
||||||
; GFX10-NEXT: s_lshr_b32 s3, s3, 16
|
; GFX10-NEXT: s_lshr_b32 s3, s3, 16
|
||||||
; GFX10-NEXT: s_cmp_lt_u32 s7, s10
|
; GFX10-NEXT: s_cmp_lt_u32 s6, s9
|
||||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||||
; GFX10-NEXT: s_cselect_b32 s7, s7, s10
|
; GFX10-NEXT: s_cselect_b32 s6, s6, s9
|
||||||
; GFX10-NEXT: s_cmp_lt_u32 s9, s3
|
; GFX10-NEXT: s_cmp_lt_u32 s8, s3
|
||||||
; GFX10-NEXT: s_cselect_b32 s3, s9, s3
|
; GFX10-NEXT: s_cselect_b32 s3, s8, s3
|
||||||
; GFX10-NEXT: s_and_b32 s10, s4, s8
|
; GFX10-NEXT: s_and_b32 s9, s4, s7
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s3, s7, s3
|
; GFX10-NEXT: s_pack_ll_b32_b16 s3, s6, s3
|
||||||
; GFX10-NEXT: s_lshr_b32 s7, s0, 16
|
; GFX10-NEXT: s_lshr_b32 s6, s0, 16
|
||||||
; GFX10-NEXT: s_lshr_b32 s9, s3, 16
|
; GFX10-NEXT: s_lshr_b32 s8, s3, 16
|
||||||
; GFX10-NEXT: s_add_i32 s0, s0, s3
|
; GFX10-NEXT: s_add_i32 s0, s0, s3
|
||||||
; GFX10-NEXT: s_xor_b32 s3, s1, s6
|
; GFX10-NEXT: s_xor_b32 s3, s1, -1
|
||||||
; GFX10-NEXT: s_add_i32 s7, s7, s9
|
; GFX10-NEXT: s_add_i32 s6, s6, s8
|
||||||
; GFX10-NEXT: s_lshr_b32 s9, s3, 16
|
; GFX10-NEXT: s_lshr_b32 s8, s3, 16
|
||||||
; GFX10-NEXT: s_and_b32 s3, s3, s8
|
; GFX10-NEXT: s_and_b32 s3, s3, s7
|
||||||
; GFX10-NEXT: s_lshr_b32 s4, s4, 16
|
; GFX10-NEXT: s_lshr_b32 s4, s4, 16
|
||||||
; GFX10-NEXT: s_cmp_lt_u32 s3, s10
|
; GFX10-NEXT: s_cmp_lt_u32 s3, s9
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s7
|
; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s6
|
||||||
; GFX10-NEXT: s_cselect_b32 s3, s3, s10
|
; GFX10-NEXT: s_cselect_b32 s3, s3, s9
|
||||||
; GFX10-NEXT: s_cmp_lt_u32 s9, s4
|
; GFX10-NEXT: s_cmp_lt_u32 s8, s4
|
||||||
; GFX10-NEXT: s_cselect_b32 s4, s9, s4
|
; GFX10-NEXT: s_cselect_b32 s4, s8, s4
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s3, s3, s4
|
; GFX10-NEXT: s_pack_ll_b32_b16 s3, s3, s4
|
||||||
; GFX10-NEXT: s_lshr_b32 s4, s1, 16
|
; GFX10-NEXT: s_lshr_b32 s4, s1, 16
|
||||||
; GFX10-NEXT: s_lshr_b32 s9, s3, 16
|
; GFX10-NEXT: s_lshr_b32 s8, s3, 16
|
||||||
; GFX10-NEXT: s_add_i32 s1, s1, s3
|
; GFX10-NEXT: s_add_i32 s1, s1, s3
|
||||||
; GFX10-NEXT: s_xor_b32 s3, s2, s6
|
; GFX10-NEXT: s_xor_b32 s3, s2, -1
|
||||||
; GFX10-NEXT: s_add_i32 s4, s4, s9
|
; GFX10-NEXT: s_add_i32 s4, s4, s8
|
||||||
; GFX10-NEXT: s_lshr_b32 s6, s3, 16
|
; GFX10-NEXT: s_lshr_b32 s8, s3, 16
|
||||||
; GFX10-NEXT: s_and_b32 s3, s3, s8
|
; GFX10-NEXT: s_and_b32 s3, s3, s7
|
||||||
; GFX10-NEXT: s_and_b32 s8, s5, s8
|
; GFX10-NEXT: s_and_b32 s7, s5, s7
|
||||||
; GFX10-NEXT: s_lshr_b32 s5, s5, 16
|
; GFX10-NEXT: s_lshr_b32 s5, s5, 16
|
||||||
; GFX10-NEXT: s_cmp_lt_u32 s3, s8
|
; GFX10-NEXT: s_cmp_lt_u32 s3, s7
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s4
|
; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s4
|
||||||
; GFX10-NEXT: s_cselect_b32 s3, s3, s8
|
; GFX10-NEXT: s_cselect_b32 s3, s3, s7
|
||||||
; GFX10-NEXT: s_cmp_lt_u32 s6, s5
|
; GFX10-NEXT: s_cmp_lt_u32 s8, s5
|
||||||
; GFX10-NEXT: s_cselect_b32 s5, s6, s5
|
; GFX10-NEXT: s_cselect_b32 s5, s8, s5
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s3, s3, s5
|
; GFX10-NEXT: s_pack_ll_b32_b16 s3, s3, s5
|
||||||
; GFX10-NEXT: s_lshr_b32 s5, s2, 16
|
; GFX10-NEXT: s_lshr_b32 s5, s2, 16
|
||||||
; GFX10-NEXT: s_lshr_b32 s6, s3, 16
|
; GFX10-NEXT: s_lshr_b32 s6, s3, 16
|
||||||
|
@ -3324,17 +3308,16 @@ define <4 x float> @v_uaddsat_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
|
||||||
; GFX9-LABEL: v_uaddsat_v8i16:
|
; GFX9-LABEL: v_uaddsat_v8i16:
|
||||||
; GFX9: ; %bb.0:
|
; GFX9: ; %bb.0:
|
||||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s4, -1, -1
|
; GFX9-NEXT: v_xor_b32_e32 v8, -1, v0
|
||||||
; GFX9-NEXT: v_xor_b32_e32 v8, s4, v0
|
|
||||||
; GFX9-NEXT: v_pk_min_u16 v4, v8, v4
|
; GFX9-NEXT: v_pk_min_u16 v4, v8, v4
|
||||||
; GFX9-NEXT: v_pk_add_u16 v0, v0, v4
|
; GFX9-NEXT: v_pk_add_u16 v0, v0, v4
|
||||||
; GFX9-NEXT: v_xor_b32_e32 v4, s4, v1
|
; GFX9-NEXT: v_xor_b32_e32 v4, -1, v1
|
||||||
; GFX9-NEXT: v_pk_min_u16 v4, v4, v5
|
; GFX9-NEXT: v_pk_min_u16 v4, v4, v5
|
||||||
; GFX9-NEXT: v_pk_add_u16 v1, v1, v4
|
; GFX9-NEXT: v_pk_add_u16 v1, v1, v4
|
||||||
; GFX9-NEXT: v_xor_b32_e32 v4, s4, v2
|
; GFX9-NEXT: v_xor_b32_e32 v4, -1, v2
|
||||||
; GFX9-NEXT: v_pk_min_u16 v4, v4, v6
|
; GFX9-NEXT: v_pk_min_u16 v4, v4, v6
|
||||||
; GFX9-NEXT: v_pk_add_u16 v2, v2, v4
|
; GFX9-NEXT: v_pk_add_u16 v2, v2, v4
|
||||||
; GFX9-NEXT: v_xor_b32_e32 v4, s4, v3
|
; GFX9-NEXT: v_xor_b32_e32 v4, -1, v3
|
||||||
; GFX9-NEXT: v_pk_min_u16 v4, v4, v7
|
; GFX9-NEXT: v_pk_min_u16 v4, v4, v7
|
||||||
; GFX9-NEXT: v_pk_add_u16 v3, v3, v4
|
; GFX9-NEXT: v_pk_add_u16 v3, v3, v4
|
||||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||||
|
@ -3343,12 +3326,11 @@ define <4 x float> @v_uaddsat_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
|
||||||
; GFX10: ; %bb.0:
|
; GFX10: ; %bb.0:
|
||||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s4, -1, -1
|
; GFX10-NEXT: v_xor_b32_e32 v15, -1, v0
|
||||||
|
; GFX10-NEXT: v_xor_b32_e32 v19, -1, v1
|
||||||
|
; GFX10-NEXT: v_xor_b32_e32 v23, -1, v2
|
||||||
|
; GFX10-NEXT: v_xor_b32_e32 v10, -1, v3
|
||||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||||
; GFX10-NEXT: v_xor_b32_e32 v15, s4, v0
|
|
||||||
; GFX10-NEXT: v_xor_b32_e32 v19, s4, v1
|
|
||||||
; GFX10-NEXT: v_xor_b32_e32 v23, s4, v2
|
|
||||||
; GFX10-NEXT: v_xor_b32_e32 v10, s4, v3
|
|
||||||
; GFX10-NEXT: v_pk_min_u16 v11, v15, v4
|
; GFX10-NEXT: v_pk_min_u16 v11, v15, v4
|
||||||
; GFX10-NEXT: v_pk_min_u16 v15, v19, v5
|
; GFX10-NEXT: v_pk_min_u16 v15, v19, v5
|
||||||
; GFX10-NEXT: v_pk_min_u16 v19, v23, v6
|
; GFX10-NEXT: v_pk_min_u16 v19, v23, v6
|
||||||
|
@ -3519,58 +3501,57 @@ define amdgpu_ps <4 x i32> @s_uaddsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
|
||||||
;
|
;
|
||||||
; GFX9-LABEL: s_uaddsat_v8i16:
|
; GFX9-LABEL: s_uaddsat_v8i16:
|
||||||
; GFX9: ; %bb.0:
|
; GFX9: ; %bb.0:
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s8, -1, -1
|
; GFX9-NEXT: s_xor_b32 s8, s0, -1
|
||||||
; GFX9-NEXT: s_xor_b32 s9, s0, s8
|
; GFX9-NEXT: s_mov_b32 s10, 0xffff
|
||||||
; GFX9-NEXT: s_mov_b32 s11, 0xffff
|
; GFX9-NEXT: s_lshr_b32 s9, s8, 16
|
||||||
; GFX9-NEXT: s_lshr_b32 s10, s9, 16
|
; GFX9-NEXT: s_lshr_b32 s11, s4, 16
|
||||||
; GFX9-NEXT: s_lshr_b32 s12, s4, 16
|
; GFX9-NEXT: s_and_b32 s8, s8, s10
|
||||||
; GFX9-NEXT: s_and_b32 s9, s9, s11
|
; GFX9-NEXT: s_and_b32 s4, s4, s10
|
||||||
; GFX9-NEXT: s_and_b32 s4, s4, s11
|
; GFX9-NEXT: s_cmp_lt_u32 s8, s4
|
||||||
; GFX9-NEXT: s_cmp_lt_u32 s9, s4
|
; GFX9-NEXT: s_cselect_b32 s4, s8, s4
|
||||||
; GFX9-NEXT: s_cselect_b32 s4, s9, s4
|
; GFX9-NEXT: s_cmp_lt_u32 s9, s11
|
||||||
; GFX9-NEXT: s_cmp_lt_u32 s10, s12
|
; GFX9-NEXT: s_cselect_b32 s8, s9, s11
|
||||||
; GFX9-NEXT: s_cselect_b32 s9, s10, s12
|
; GFX9-NEXT: s_pack_ll_b32_b16 s4, s4, s8
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s4, s4, s9
|
; GFX9-NEXT: s_lshr_b32 s8, s0, 16
|
||||||
; GFX9-NEXT: s_lshr_b32 s9, s0, 16
|
|
||||||
; GFX9-NEXT: s_lshr_b32 s10, s4, 16
|
|
||||||
; GFX9-NEXT: s_add_i32 s0, s0, s4
|
|
||||||
; GFX9-NEXT: s_add_i32 s9, s9, s10
|
|
||||||
; GFX9-NEXT: s_xor_b32 s4, s1, s8
|
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s9
|
|
||||||
; GFX9-NEXT: s_lshr_b32 s9, s4, 16
|
; GFX9-NEXT: s_lshr_b32 s9, s4, 16
|
||||||
; GFX9-NEXT: s_lshr_b32 s10, s5, 16
|
; GFX9-NEXT: s_add_i32 s0, s0, s4
|
||||||
; GFX9-NEXT: s_and_b32 s4, s4, s11
|
; GFX9-NEXT: s_add_i32 s8, s8, s9
|
||||||
; GFX9-NEXT: s_and_b32 s5, s5, s11
|
; GFX9-NEXT: s_xor_b32 s4, s1, -1
|
||||||
|
; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s8
|
||||||
|
; GFX9-NEXT: s_lshr_b32 s8, s4, 16
|
||||||
|
; GFX9-NEXT: s_lshr_b32 s9, s5, 16
|
||||||
|
; GFX9-NEXT: s_and_b32 s4, s4, s10
|
||||||
|
; GFX9-NEXT: s_and_b32 s5, s5, s10
|
||||||
; GFX9-NEXT: s_cmp_lt_u32 s4, s5
|
; GFX9-NEXT: s_cmp_lt_u32 s4, s5
|
||||||
; GFX9-NEXT: s_cselect_b32 s4, s4, s5
|
; GFX9-NEXT: s_cselect_b32 s4, s4, s5
|
||||||
; GFX9-NEXT: s_cmp_lt_u32 s9, s10
|
; GFX9-NEXT: s_cmp_lt_u32 s8, s9
|
||||||
; GFX9-NEXT: s_cselect_b32 s5, s9, s10
|
; GFX9-NEXT: s_cselect_b32 s5, s8, s9
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s4, s4, s5
|
; GFX9-NEXT: s_pack_ll_b32_b16 s4, s4, s5
|
||||||
; GFX9-NEXT: s_lshr_b32 s5, s1, 16
|
; GFX9-NEXT: s_lshr_b32 s5, s1, 16
|
||||||
; GFX9-NEXT: s_lshr_b32 s9, s4, 16
|
; GFX9-NEXT: s_lshr_b32 s8, s4, 16
|
||||||
; GFX9-NEXT: s_add_i32 s1, s1, s4
|
; GFX9-NEXT: s_add_i32 s1, s1, s4
|
||||||
; GFX9-NEXT: s_add_i32 s5, s5, s9
|
; GFX9-NEXT: s_add_i32 s5, s5, s8
|
||||||
; GFX9-NEXT: s_xor_b32 s4, s2, s8
|
; GFX9-NEXT: s_xor_b32 s4, s2, -1
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s5
|
; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s5
|
||||||
; GFX9-NEXT: s_lshr_b32 s5, s4, 16
|
; GFX9-NEXT: s_lshr_b32 s5, s4, 16
|
||||||
; GFX9-NEXT: s_lshr_b32 s9, s6, 16
|
; GFX9-NEXT: s_lshr_b32 s8, s6, 16
|
||||||
; GFX9-NEXT: s_and_b32 s4, s4, s11
|
; GFX9-NEXT: s_and_b32 s4, s4, s10
|
||||||
; GFX9-NEXT: s_and_b32 s6, s6, s11
|
; GFX9-NEXT: s_and_b32 s6, s6, s10
|
||||||
; GFX9-NEXT: s_cmp_lt_u32 s4, s6
|
; GFX9-NEXT: s_cmp_lt_u32 s4, s6
|
||||||
; GFX9-NEXT: s_cselect_b32 s4, s4, s6
|
; GFX9-NEXT: s_cselect_b32 s4, s4, s6
|
||||||
; GFX9-NEXT: s_cmp_lt_u32 s5, s9
|
; GFX9-NEXT: s_cmp_lt_u32 s5, s8
|
||||||
; GFX9-NEXT: s_cselect_b32 s5, s5, s9
|
; GFX9-NEXT: s_cselect_b32 s5, s5, s8
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s4, s4, s5
|
; GFX9-NEXT: s_pack_ll_b32_b16 s4, s4, s5
|
||||||
; GFX9-NEXT: s_lshr_b32 s5, s2, 16
|
; GFX9-NEXT: s_lshr_b32 s5, s2, 16
|
||||||
; GFX9-NEXT: s_lshr_b32 s6, s4, 16
|
; GFX9-NEXT: s_lshr_b32 s6, s4, 16
|
||||||
; GFX9-NEXT: s_add_i32 s2, s2, s4
|
; GFX9-NEXT: s_add_i32 s2, s2, s4
|
||||||
; GFX9-NEXT: s_add_i32 s5, s5, s6
|
; GFX9-NEXT: s_add_i32 s5, s5, s6
|
||||||
; GFX9-NEXT: s_xor_b32 s4, s3, s8
|
; GFX9-NEXT: s_xor_b32 s4, s3, -1
|
||||||
; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s5
|
; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s5
|
||||||
; GFX9-NEXT: s_lshr_b32 s5, s4, 16
|
; GFX9-NEXT: s_lshr_b32 s5, s4, 16
|
||||||
; GFX9-NEXT: s_lshr_b32 s6, s7, 16
|
; GFX9-NEXT: s_lshr_b32 s6, s7, 16
|
||||||
; GFX9-NEXT: s_and_b32 s4, s4, s11
|
; GFX9-NEXT: s_and_b32 s4, s4, s10
|
||||||
; GFX9-NEXT: s_and_b32 s7, s7, s11
|
; GFX9-NEXT: s_and_b32 s7, s7, s10
|
||||||
; GFX9-NEXT: s_cmp_lt_u32 s4, s7
|
; GFX9-NEXT: s_cmp_lt_u32 s4, s7
|
||||||
; GFX9-NEXT: s_cselect_b32 s4, s4, s7
|
; GFX9-NEXT: s_cselect_b32 s4, s4, s7
|
||||||
; GFX9-NEXT: s_cmp_lt_u32 s5, s6
|
; GFX9-NEXT: s_cmp_lt_u32 s5, s6
|
||||||
|
@ -3585,63 +3566,62 @@ define amdgpu_ps <4 x i32> @s_uaddsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
|
||||||
;
|
;
|
||||||
; GFX10-LABEL: s_uaddsat_v8i16:
|
; GFX10-LABEL: s_uaddsat_v8i16:
|
||||||
; GFX10: ; %bb.0:
|
; GFX10: ; %bb.0:
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s8, -1, -1
|
; GFX10-NEXT: s_xor_b32 s8, s0, -1
|
||||||
; GFX10-NEXT: s_mov_b32 s10, 0xffff
|
; GFX10-NEXT: s_mov_b32 s9, 0xffff
|
||||||
; GFX10-NEXT: s_xor_b32 s9, s0, s8
|
; GFX10-NEXT: s_lshr_b32 s10, s8, 16
|
||||||
; GFX10-NEXT: s_and_b32 s12, s4, s10
|
; GFX10-NEXT: s_and_b32 s11, s4, s9
|
||||||
; GFX10-NEXT: s_lshr_b32 s11, s9, 16
|
; GFX10-NEXT: s_and_b32 s8, s8, s9
|
||||||
; GFX10-NEXT: s_and_b32 s9, s9, s10
|
|
||||||
; GFX10-NEXT: s_lshr_b32 s4, s4, 16
|
; GFX10-NEXT: s_lshr_b32 s4, s4, 16
|
||||||
; GFX10-NEXT: s_cmp_lt_u32 s9, s12
|
; GFX10-NEXT: s_cmp_lt_u32 s8, s11
|
||||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||||
; GFX10-NEXT: s_cselect_b32 s9, s9, s12
|
; GFX10-NEXT: s_cselect_b32 s8, s8, s11
|
||||||
; GFX10-NEXT: s_cmp_lt_u32 s11, s4
|
; GFX10-NEXT: s_cmp_lt_u32 s10, s4
|
||||||
; GFX10-NEXT: s_cselect_b32 s4, s11, s4
|
; GFX10-NEXT: s_cselect_b32 s4, s10, s4
|
||||||
; GFX10-NEXT: s_and_b32 s12, s5, s10
|
; GFX10-NEXT: s_and_b32 s11, s5, s9
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s4, s9, s4
|
; GFX10-NEXT: s_pack_ll_b32_b16 s4, s8, s4
|
||||||
; GFX10-NEXT: s_lshr_b32 s9, s0, 16
|
; GFX10-NEXT: s_lshr_b32 s8, s0, 16
|
||||||
; GFX10-NEXT: s_lshr_b32 s11, s4, 16
|
; GFX10-NEXT: s_lshr_b32 s10, s4, 16
|
||||||
; GFX10-NEXT: s_add_i32 s0, s0, s4
|
; GFX10-NEXT: s_add_i32 s0, s0, s4
|
||||||
; GFX10-NEXT: s_xor_b32 s4, s1, s8
|
; GFX10-NEXT: s_xor_b32 s4, s1, -1
|
||||||
; GFX10-NEXT: s_add_i32 s9, s9, s11
|
; GFX10-NEXT: s_add_i32 s8, s8, s10
|
||||||
; GFX10-NEXT: s_lshr_b32 s11, s4, 16
|
; GFX10-NEXT: s_lshr_b32 s10, s4, 16
|
||||||
; GFX10-NEXT: s_and_b32 s4, s4, s10
|
; GFX10-NEXT: s_and_b32 s4, s4, s9
|
||||||
; GFX10-NEXT: s_lshr_b32 s5, s5, 16
|
; GFX10-NEXT: s_lshr_b32 s5, s5, 16
|
||||||
; GFX10-NEXT: s_cmp_lt_u32 s4, s12
|
; GFX10-NEXT: s_cmp_lt_u32 s4, s11
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s9
|
; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s8
|
||||||
; GFX10-NEXT: s_cselect_b32 s4, s4, s12
|
; GFX10-NEXT: s_cselect_b32 s4, s4, s11
|
||||||
; GFX10-NEXT: s_cmp_lt_u32 s11, s5
|
; GFX10-NEXT: s_cmp_lt_u32 s10, s5
|
||||||
; GFX10-NEXT: s_cselect_b32 s5, s11, s5
|
; GFX10-NEXT: s_cselect_b32 s5, s10, s5
|
||||||
; GFX10-NEXT: s_and_b32 s12, s6, s10
|
; GFX10-NEXT: s_and_b32 s11, s6, s9
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s4, s4, s5
|
; GFX10-NEXT: s_pack_ll_b32_b16 s4, s4, s5
|
||||||
; GFX10-NEXT: s_lshr_b32 s5, s1, 16
|
; GFX10-NEXT: s_lshr_b32 s5, s1, 16
|
||||||
; GFX10-NEXT: s_lshr_b32 s11, s4, 16
|
; GFX10-NEXT: s_lshr_b32 s10, s4, 16
|
||||||
; GFX10-NEXT: s_add_i32 s1, s1, s4
|
; GFX10-NEXT: s_add_i32 s1, s1, s4
|
||||||
; GFX10-NEXT: s_xor_b32 s4, s2, s8
|
; GFX10-NEXT: s_xor_b32 s4, s2, -1
|
||||||
; GFX10-NEXT: s_add_i32 s5, s5, s11
|
; GFX10-NEXT: s_add_i32 s5, s5, s10
|
||||||
; GFX10-NEXT: s_lshr_b32 s11, s4, 16
|
; GFX10-NEXT: s_lshr_b32 s10, s4, 16
|
||||||
; GFX10-NEXT: s_and_b32 s4, s4, s10
|
; GFX10-NEXT: s_and_b32 s4, s4, s9
|
||||||
; GFX10-NEXT: s_lshr_b32 s6, s6, 16
|
; GFX10-NEXT: s_lshr_b32 s6, s6, 16
|
||||||
; GFX10-NEXT: s_cmp_lt_u32 s4, s12
|
; GFX10-NEXT: s_cmp_lt_u32 s4, s11
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s5
|
; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s5
|
||||||
; GFX10-NEXT: s_cselect_b32 s4, s4, s12
|
; GFX10-NEXT: s_cselect_b32 s4, s4, s11
|
||||||
; GFX10-NEXT: s_cmp_lt_u32 s11, s6
|
; GFX10-NEXT: s_cmp_lt_u32 s10, s6
|
||||||
; GFX10-NEXT: s_cselect_b32 s6, s11, s6
|
; GFX10-NEXT: s_cselect_b32 s6, s10, s6
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s4, s4, s6
|
; GFX10-NEXT: s_pack_ll_b32_b16 s4, s4, s6
|
||||||
; GFX10-NEXT: s_lshr_b32 s6, s2, 16
|
; GFX10-NEXT: s_lshr_b32 s6, s2, 16
|
||||||
; GFX10-NEXT: s_lshr_b32 s11, s4, 16
|
; GFX10-NEXT: s_lshr_b32 s10, s4, 16
|
||||||
; GFX10-NEXT: s_add_i32 s2, s2, s4
|
; GFX10-NEXT: s_add_i32 s2, s2, s4
|
||||||
; GFX10-NEXT: s_xor_b32 s4, s3, s8
|
; GFX10-NEXT: s_xor_b32 s4, s3, -1
|
||||||
; GFX10-NEXT: s_add_i32 s6, s6, s11
|
; GFX10-NEXT: s_add_i32 s6, s6, s10
|
||||||
; GFX10-NEXT: s_lshr_b32 s8, s4, 16
|
; GFX10-NEXT: s_lshr_b32 s10, s4, 16
|
||||||
; GFX10-NEXT: s_and_b32 s4, s4, s10
|
; GFX10-NEXT: s_and_b32 s4, s4, s9
|
||||||
; GFX10-NEXT: s_and_b32 s10, s7, s10
|
; GFX10-NEXT: s_and_b32 s9, s7, s9
|
||||||
; GFX10-NEXT: s_lshr_b32 s7, s7, 16
|
; GFX10-NEXT: s_lshr_b32 s7, s7, 16
|
||||||
; GFX10-NEXT: s_cmp_lt_u32 s4, s10
|
; GFX10-NEXT: s_cmp_lt_u32 s4, s9
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s2, s2, s6
|
; GFX10-NEXT: s_pack_ll_b32_b16 s2, s2, s6
|
||||||
; GFX10-NEXT: s_cselect_b32 s4, s4, s10
|
; GFX10-NEXT: s_cselect_b32 s4, s4, s9
|
||||||
; GFX10-NEXT: s_cmp_lt_u32 s8, s7
|
; GFX10-NEXT: s_cmp_lt_u32 s10, s7
|
||||||
; GFX10-NEXT: s_cselect_b32 s7, s8, s7
|
; GFX10-NEXT: s_cselect_b32 s7, s10, s7
|
||||||
; GFX10-NEXT: s_lshr_b32 s5, s3, 16
|
; GFX10-NEXT: s_lshr_b32 s5, s3, 16
|
||||||
; GFX10-NEXT: s_pack_ll_b32_b16 s4, s4, s7
|
; GFX10-NEXT: s_pack_ll_b32_b16 s4, s4, s7
|
||||||
; GFX10-NEXT: s_lshr_b32 s7, s4, 16
|
; GFX10-NEXT: s_lshr_b32 s7, s4, 16
|
||||||
|
|
|
@ -50,16 +50,14 @@ define amdgpu_ps i32 @scalar_xnor_v2i16_one_use(<2 x i16> inreg %a, <2 x i16> in
|
||||||
;
|
;
|
||||||
; GFX900-LABEL: scalar_xnor_v2i16_one_use:
|
; GFX900-LABEL: scalar_xnor_v2i16_one_use:
|
||||||
; GFX900: ; %bb.0: ; %entry
|
; GFX900: ; %bb.0: ; %entry
|
||||||
; GFX900-NEXT: s_pack_ll_b32_b16 s2, -1, -1
|
|
||||||
; GFX900-NEXT: s_xor_b32 s0, s0, s1
|
; GFX900-NEXT: s_xor_b32 s0, s0, s1
|
||||||
; GFX900-NEXT: s_xor_b32 s0, s0, s2
|
; GFX900-NEXT: s_xor_b32 s0, s0, -1
|
||||||
; GFX900-NEXT: ; return to shader part epilog
|
; GFX900-NEXT: ; return to shader part epilog
|
||||||
;
|
;
|
||||||
; GFX906-LABEL: scalar_xnor_v2i16_one_use:
|
; GFX906-LABEL: scalar_xnor_v2i16_one_use:
|
||||||
; GFX906: ; %bb.0: ; %entry
|
; GFX906: ; %bb.0: ; %entry
|
||||||
; GFX906-NEXT: s_pack_ll_b32_b16 s2, -1, -1
|
|
||||||
; GFX906-NEXT: s_xor_b32 s0, s0, s1
|
; GFX906-NEXT: s_xor_b32 s0, s0, s1
|
||||||
; GFX906-NEXT: s_xor_b32 s0, s0, s2
|
; GFX906-NEXT: s_xor_b32 s0, s0, -1
|
||||||
; GFX906-NEXT: ; return to shader part epilog
|
; GFX906-NEXT: ; return to shader part epilog
|
||||||
entry:
|
entry:
|
||||||
%xor = xor <2 x i16> %a, %b
|
%xor = xor <2 x i16> %a, %b
|
||||||
|
@ -150,7 +148,7 @@ define amdgpu_ps i64 @scalar_xnor_v4i16_one_use(<4 x i16> inreg %a, <4 x i16> in
|
||||||
;
|
;
|
||||||
; GFX900-LABEL: scalar_xnor_v4i16_one_use:
|
; GFX900-LABEL: scalar_xnor_v4i16_one_use:
|
||||||
; GFX900: ; %bb.0:
|
; GFX900: ; %bb.0:
|
||||||
; GFX900-NEXT: s_pack_ll_b32_b16 s4, -1, -1
|
; GFX900-NEXT: s_mov_b32 s4, -1
|
||||||
; GFX900-NEXT: s_mov_b32 s5, s4
|
; GFX900-NEXT: s_mov_b32 s5, s4
|
||||||
; GFX900-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
|
; GFX900-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
|
||||||
; GFX900-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
|
; GFX900-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
|
||||||
|
@ -158,7 +156,7 @@ define amdgpu_ps i64 @scalar_xnor_v4i16_one_use(<4 x i16> inreg %a, <4 x i16> in
|
||||||
;
|
;
|
||||||
; GFX906-LABEL: scalar_xnor_v4i16_one_use:
|
; GFX906-LABEL: scalar_xnor_v4i16_one_use:
|
||||||
; GFX906: ; %bb.0:
|
; GFX906: ; %bb.0:
|
||||||
; GFX906-NEXT: s_pack_ll_b32_b16 s4, -1, -1
|
; GFX906-NEXT: s_mov_b32 s4, -1
|
||||||
; GFX906-NEXT: s_mov_b32 s5, s4
|
; GFX906-NEXT: s_mov_b32 s5, s4
|
||||||
; GFX906-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
|
; GFX906-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
|
||||||
; GFX906-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
|
; GFX906-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
|
||||||
|
|
Loading…
Reference in New Issue