forked from OSchip/llvm-project
AMDGPU/GlobalISel: Improve 16-bit bswap
Match the new DAG behavior and use v_perm_b32 when available. Also does better on SI/CI by expanding 16-bit swaps. Also fix non-power-of-2 cases.
This commit is contained in:
parent
3bb0ff8341
commit
60fea2713d
|
@ -598,12 +598,21 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
|
|||
.widenScalarToNextPow2(0, 32)
|
||||
.widenScalarToNextPow2(1, 32);
|
||||
|
||||
getActionDefinitionsBuilder({G_BSWAP, G_BITREVERSE})
|
||||
getActionDefinitionsBuilder(G_BITREVERSE)
|
||||
.legalFor({S32})
|
||||
.clampScalar(0, S32, S32)
|
||||
.scalarize(0);
|
||||
|
||||
if (ST.has16BitInsts()) {
|
||||
getActionDefinitionsBuilder(G_BSWAP)
|
||||
.legalFor({S16, S32, V2S16})
|
||||
.clampMaxNumElements(0, S16, 2)
|
||||
// FIXME: Fixing non-power-of-2 before clamp is workaround for
|
||||
// narrowScalar limitation.
|
||||
.widenScalarToNextPow2(0)
|
||||
.clampScalar(0, S16, S32)
|
||||
.scalarize(0);
|
||||
|
||||
if (ST.hasVOP3PInsts()) {
|
||||
getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
|
||||
.legalFor({S32, S16, V2S16})
|
||||
|
@ -620,6 +629,17 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
|
|||
.scalarize(0);
|
||||
}
|
||||
} else {
|
||||
// TODO: Should have same legality without v_perm_b32
|
||||
getActionDefinitionsBuilder(G_BSWAP)
|
||||
.legalFor({S32})
|
||||
.lowerIf(narrowerThan(0, 32))
|
||||
// FIXME: Fixing non-power-of-2 before clamp is workaround for
|
||||
// narrowScalar limitation.
|
||||
.widenScalarToNextPow2(0)
|
||||
.maxScalar(0, S32)
|
||||
.scalarize(0)
|
||||
.lower();
|
||||
|
||||
getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
|
||||
.legalFor({S32})
|
||||
.clampScalar(0, S32, S32)
|
||||
|
|
|
@ -341,32 +341,26 @@ define <2 x i64> @v_bswap_v2i64(<2 x i64> %src) {
|
|||
define amdgpu_ps i16 @s_bswap_i16(i16 inreg %src) {
|
||||
; GFX7-LABEL: s_bswap_i16:
|
||||
; GFX7: ; %bb.0:
|
||||
; GFX7-NEXT: v_alignbit_b32 v0, s0, s0, 8
|
||||
; GFX7-NEXT: v_alignbit_b32 v1, s0, s0, 24
|
||||
; GFX7-NEXT: s_mov_b32 s0, 0xff00ff
|
||||
; GFX7-NEXT: v_bfi_b32 v0, s0, v1, v0
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
||||
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
||||
; GFX7-NEXT: v_readfirstlane_b32 s0, v0
|
||||
; GFX7-NEXT: s_lshl_b32 s1, s0, 8
|
||||
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
|
||||
; GFX7-NEXT: s_lshr_b32 s0, s0, 8
|
||||
; GFX7-NEXT: s_or_b32 s0, s0, s1
|
||||
; GFX7-NEXT: s_bfe_u32 s0, s0, 0x100000
|
||||
; GFX7-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX8-LABEL: s_bswap_i16:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX8-NEXT: s_mov_b32 s0, 0x10203
|
||||
; GFX8-NEXT: s_mov_b32 s0, 0xc0c0001
|
||||
; GFX8-NEXT: v_perm_b32 v0, 0, v0, s0
|
||||
; GFX8-NEXT: v_mov_b32_e32 v1, 0xffff
|
||||
; GFX8-NEXT: v_and_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
|
||||
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
|
||||
; GFX8-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX9-LABEL: s_bswap_i16:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX9-NEXT: s_mov_b32 s0, 0x10203
|
||||
; GFX9-NEXT: s_mov_b32 s0, 0xc0c0001
|
||||
; GFX9-NEXT: v_perm_b32 v0, 0, v0, s0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, 0xffff
|
||||
; GFX9-NEXT: v_and_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
|
||||
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
|
||||
; GFX9-NEXT: ; return to shader part epilog
|
||||
%bswap = call i16 @llvm.bswap.i16(i16 %src)
|
||||
|
@ -380,27 +374,24 @@ define i16 @v_bswap_i16(i16 %src) {
|
|||
; GFX7-LABEL: v_bswap_i16:
|
||||
; GFX7: ; %bb.0:
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: v_alignbit_b32 v1, v0, v0, 8
|
||||
; GFX7-NEXT: v_alignbit_b32 v0, v0, v0, 24
|
||||
; GFX7-NEXT: s_mov_b32 s4, 0xff00ff
|
||||
; GFX7-NEXT: v_bfi_b32 v0, s4, v0, v1
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
||||
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v0
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
||||
; GFX7-NEXT: v_or_b32_e32 v0, v1, v0
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_bswap_i16:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: s_mov_b32 s4, 0x10203
|
||||
; GFX8-NEXT: s_mov_b32 s4, 0xc0c0001
|
||||
; GFX8-NEXT: v_perm_b32 v0, 0, v0, s4
|
||||
; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX9-LABEL: v_bswap_i16:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: s_mov_b32 s4, 0x10203
|
||||
; GFX9-NEXT: s_mov_b32 s4, 0xc0c0001
|
||||
; GFX9-NEXT: v_perm_b32 v0, 0, v0, s4
|
||||
; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%bswap = call i16 @llvm.bswap.i16(i16 %src)
|
||||
ret i16 %bswap
|
||||
|
@ -409,51 +400,34 @@ define i16 @v_bswap_i16(i16 %src) {
|
|||
define amdgpu_ps i32 @s_bswap_v2i16(<2 x i16> inreg %src) {
|
||||
; GFX7-LABEL: s_bswap_v2i16:
|
||||
; GFX7: ; %bb.0:
|
||||
; GFX7-NEXT: v_alignbit_b32 v0, s0, s0, 8
|
||||
; GFX7-NEXT: v_alignbit_b32 v1, s0, s0, 24
|
||||
; GFX7-NEXT: s_mov_b32 s0, 0xff00ff
|
||||
; GFX7-NEXT: v_bfi_b32 v0, s0, v1, v0
|
||||
; GFX7-NEXT: v_alignbit_b32 v1, s1, s1, 8
|
||||
; GFX7-NEXT: v_alignbit_b32 v2, s1, s1, 24
|
||||
; GFX7-NEXT: v_bfi_b32 v1, s0, v2, v1
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v1
|
||||
; GFX7-NEXT: s_mov_b32 s0, 0xffff
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
||||
; GFX7-NEXT: v_and_b32_e32 v1, s0, v1
|
||||
; GFX7-NEXT: v_and_b32_e32 v0, s0, v0
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
||||
; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; GFX7-NEXT: v_readfirstlane_b32 s0, v0
|
||||
; GFX7-NEXT: s_mov_b32 s3, 0xffff
|
||||
; GFX7-NEXT: s_lshl_b32 s2, s0, 8
|
||||
; GFX7-NEXT: s_and_b32 s0, s0, s3
|
||||
; GFX7-NEXT: s_lshr_b32 s0, s0, 8
|
||||
; GFX7-NEXT: s_or_b32 s0, s0, s2
|
||||
; GFX7-NEXT: s_lshl_b32 s2, s1, 8
|
||||
; GFX7-NEXT: s_and_b32 s1, s1, s3
|
||||
; GFX7-NEXT: s_lshr_b32 s1, s1, 8
|
||||
; GFX7-NEXT: s_or_b32 s1, s1, s2
|
||||
; GFX7-NEXT: s_bfe_u32 s1, s1, 0x100000
|
||||
; GFX7-NEXT: s_bfe_u32 s0, s0, 0x100000
|
||||
; GFX7-NEXT: s_lshl_b32 s1, s1, 16
|
||||
; GFX7-NEXT: s_or_b32 s0, s0, s1
|
||||
; GFX7-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX8-LABEL: s_bswap_v2i16:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_lshr_b32 s1, s0, 16
|
||||
; GFX8-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX8-NEXT: s_mov_b32 s0, 0x10203
|
||||
; GFX8-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX8-NEXT: s_mov_b32 s0, 0x2030001
|
||||
; GFX8-NEXT: v_perm_b32 v0, 0, v0, s0
|
||||
; GFX8-NEXT: v_mov_b32_e32 v2, 0xffff
|
||||
; GFX8-NEXT: v_perm_b32 v1, 0, v1, s0
|
||||
; GFX8-NEXT: v_and_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
|
||||
; GFX8-NEXT: v_and_b32_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
|
||||
; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
|
||||
; GFX8-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX9-LABEL: s_bswap_v2i16:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_lshr_b32 s1, s0, 16
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX9-NEXT: s_mov_b32 s0, 0x10203
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX9-NEXT: v_perm_b32 v1, 0, v1, s0
|
||||
; GFX9-NEXT: s_mov_b32 s0, 0x2030001
|
||||
; GFX9-NEXT: v_perm_b32 v0, 0, v0, s0
|
||||
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v1
|
||||
; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff
|
||||
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
||||
; GFX9-NEXT: v_and_or_b32 v0, v0, v2, v1
|
||||
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
|
||||
; GFX9-NEXT: ; return to shader part epilog
|
||||
%bswap = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %src)
|
||||
|
@ -466,30 +440,25 @@ define i32 @v_bswap_i16_zext_to_i32(i16 %src) {
|
|||
; GFX7-LABEL: v_bswap_i16_zext_to_i32:
|
||||
; GFX7: ; %bb.0:
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: v_alignbit_b32 v1, v0, v0, 8
|
||||
; GFX7-NEXT: v_alignbit_b32 v0, v0, v0, 24
|
||||
; GFX7-NEXT: s_mov_b32 s4, 0xff00ff
|
||||
; GFX7-NEXT: v_bfi_b32 v0, s4, v0, v1
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
||||
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
||||
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v0
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
||||
; GFX7-NEXT: v_or_b32_e32 v0, v1, v0
|
||||
; GFX7-NEXT: v_bfe_u32 v0, v0, 0, 16
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_bswap_i16_zext_to_i32:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: s_mov_b32 s4, 0x10203
|
||||
; GFX8-NEXT: s_mov_b32 s4, 0xc0c0001
|
||||
; GFX8-NEXT: v_perm_b32 v0, 0, v0, s4
|
||||
; GFX8-NEXT: v_mov_b32_e32 v1, 0xffff
|
||||
; GFX8-NEXT: v_and_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX9-LABEL: v_bswap_i16_zext_to_i32:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: s_mov_b32 s4, 0x10203
|
||||
; GFX9-NEXT: s_mov_b32 s4, 0xc0c0001
|
||||
; GFX9-NEXT: v_perm_b32 v0, 0, v0, s4
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, 0xffff
|
||||
; GFX9-NEXT: v_and_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%bswap = call i16 @llvm.bswap.i16(i16 %src)
|
||||
%zext = zext i16 %bswap to i32
|
||||
|
@ -500,29 +469,26 @@ define i32 @v_bswap_i16_sext_to_i32(i16 %src) {
|
|||
; GFX7-LABEL: v_bswap_i16_sext_to_i32:
|
||||
; GFX7: ; %bb.0:
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: v_alignbit_b32 v1, v0, v0, 8
|
||||
; GFX7-NEXT: v_alignbit_b32 v0, v0, v0, 24
|
||||
; GFX7-NEXT: s_mov_b32 s4, 0xff00ff
|
||||
; GFX7-NEXT: v_bfi_b32 v0, s4, v0, v1
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
||||
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v0
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
||||
; GFX7-NEXT: v_or_b32_e32 v0, v1, v0
|
||||
; GFX7-NEXT: v_bfe_i32 v0, v0, 0, 16
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_bswap_i16_sext_to_i32:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: s_mov_b32 s4, 0x10203
|
||||
; GFX8-NEXT: s_mov_b32 s4, 0xc0c0001
|
||||
; GFX8-NEXT: v_perm_b32 v0, 0, v0, s4
|
||||
; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
||||
; GFX8-NEXT: v_bfe_i32 v0, v0, 0, 16
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX9-LABEL: v_bswap_i16_sext_to_i32:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: s_mov_b32 s4, 0x10203
|
||||
; GFX9-NEXT: s_mov_b32 s4, 0xc0c0001
|
||||
; GFX9-NEXT: v_perm_b32 v0, 0, v0, s4
|
||||
; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
||||
; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 16
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%bswap = call i16 @llvm.bswap.i16(i16 %src)
|
||||
|
@ -534,42 +500,29 @@ define <2 x i16> @v_bswap_v2i16(<2 x i16> %src) {
|
|||
; GFX7-LABEL: v_bswap_v2i16:
|
||||
; GFX7: ; %bb.0:
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: v_alignbit_b32 v2, v0, v0, 8
|
||||
; GFX7-NEXT: v_alignbit_b32 v0, v0, v0, 24
|
||||
; GFX7-NEXT: s_mov_b32 s4, 0xff00ff
|
||||
; GFX7-NEXT: v_bfi_b32 v0, s4, v0, v2
|
||||
; GFX7-NEXT: v_alignbit_b32 v2, v1, v1, 8
|
||||
; GFX7-NEXT: v_alignbit_b32 v1, v1, v1, 24
|
||||
; GFX7-NEXT: v_bfi_b32 v1, s4, v1, v2
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v1
|
||||
; GFX7-NEXT: s_mov_b32 s4, 0xffff
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v0
|
||||
; GFX7-NEXT: v_and_b32_e32 v0, s4, v0
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v0
|
||||
; GFX7-NEXT: v_or_b32_e32 v0, v0, v2
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v1
|
||||
; GFX7-NEXT: v_and_b32_e32 v1, s4, v1
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
||||
; GFX7-NEXT: v_or_b32_e32 v1, v1, v2
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_bswap_v2i16:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v0
|
||||
; GFX8-NEXT: s_mov_b32 s4, 0x10203
|
||||
; GFX8-NEXT: s_mov_b32 s4, 0x2030001
|
||||
; GFX8-NEXT: v_perm_b32 v0, 0, v0, s4
|
||||
; GFX8-NEXT: v_mov_b32_e32 v2, 0xffff
|
||||
; GFX8-NEXT: v_perm_b32 v1, 0, v1, s4
|
||||
; GFX8-NEXT: v_and_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
|
||||
; GFX8-NEXT: v_and_b32_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
|
||||
; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX9-LABEL: v_bswap_v2i16:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v0
|
||||
; GFX9-NEXT: s_mov_b32 s4, 0x10203
|
||||
; GFX9-NEXT: v_perm_b32 v1, 0, v1, s4
|
||||
; GFX9-NEXT: s_mov_b32 s4, 0x2030001
|
||||
; GFX9-NEXT: v_perm_b32 v0, 0, v0, s4
|
||||
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v1
|
||||
; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff
|
||||
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
||||
; GFX9-NEXT: v_and_or_b32 v0, v0, v2, v1
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%bswap = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %src)
|
||||
ret <2 x i16> %bswap
|
||||
|
@ -581,6 +534,46 @@ define <2 x i16> @v_bswap_v2i16(<2 x i16> %src) {
|
|||
; ret <3 x i16> %bswap
|
||||
; }
|
||||
|
||||
define i64 @v_bswap_i48(i64 %src) {
|
||||
; GFX7-LABEL: v_bswap_i48:
|
||||
; GFX7: ; %bb.0:
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: v_alignbit_b32 v2, v1, v1, 8
|
||||
; GFX7-NEXT: v_alignbit_b32 v1, v1, v1, 24
|
||||
; GFX7-NEXT: s_mov_b32 s4, 0xff00ff
|
||||
; GFX7-NEXT: v_bfi_b32 v1, s4, v1, v2
|
||||
; GFX7-NEXT: v_alignbit_b32 v2, v0, v0, 8
|
||||
; GFX7-NEXT: v_alignbit_b32 v0, v0, v0, 24
|
||||
; GFX7-NEXT: v_bfi_b32 v2, s4, v0, v2
|
||||
; GFX7-NEXT: v_lshr_b64 v[0:1], v[1:2], 16
|
||||
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v1
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_bswap_i48:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: s_mov_b32 s4, 0x10203
|
||||
; GFX8-NEXT: v_perm_b32 v1, 0, v1, s4
|
||||
; GFX8-NEXT: v_perm_b32 v2, 0, v0, s4
|
||||
; GFX8-NEXT: v_lshrrev_b64 v[0:1], 16, v[1:2]
|
||||
; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX9-LABEL: v_bswap_i48:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: s_mov_b32 s4, 0x10203
|
||||
; GFX9-NEXT: v_perm_b32 v1, 0, v1, s4
|
||||
; GFX9-NEXT: v_perm_b32 v2, 0, v0, s4
|
||||
; GFX9-NEXT: v_lshrrev_b64 v[0:1], 16, v[1:2]
|
||||
; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%trunc = trunc i64 %src to i48
|
||||
%bswap = call i48 @llvm.bswap.i48(i48 %trunc)
|
||||
%zext = zext i48 %bswap to i64
|
||||
ret i64 %zext
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.readfirstlane(i32) #0
|
||||
declare i16 @llvm.bswap.i16(i16) #1
|
||||
declare <2 x i16> @llvm.bswap.v2i16(<2 x i16>) #1
|
||||
|
@ -589,6 +582,7 @@ declare i32 @llvm.bswap.i32(i32) #1
|
|||
declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>) #1
|
||||
declare i64 @llvm.bswap.i64(i64) #1
|
||||
declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) #1
|
||||
declare i48 @llvm.bswap.i48(i48) #1
|
||||
|
||||
attributes #0 = { convergent nounwind readnone }
|
||||
attributes #1 = { nounwind readnone speculatable willreturn }
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s
|
||||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX7 %s
|
||||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s
|
||||
|
||||
---
|
||||
name: bswap_s8
|
||||
|
@ -7,14 +8,32 @@ name: bswap_s8
|
|||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
; CHECK-LABEL: name: bswap_s8
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
|
||||
; CHECK: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY1]]
|
||||
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
|
||||
; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP]], [[C]](s32)
|
||||
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
|
||||
; CHECK: $vgpr0 = COPY [[COPY2]](s32)
|
||||
; GFX7-LABEL: name: bswap_s8
|
||||
; GFX7: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX7: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; GFX7: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
|
||||
; GFX7: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
|
||||
; GFX7: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
|
||||
; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
|
||||
; GFX7: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[AND]](s32)
|
||||
; GFX7: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
|
||||
; GFX7: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
|
||||
; GFX7: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
|
||||
; GFX7: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
|
||||
; GFX7: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[AND1]](s32)
|
||||
; GFX7: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
|
||||
; GFX7: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SHL]](s32)
|
||||
; GFX7: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY5]], [[COPY6]]
|
||||
; GFX7: [[COPY7:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
|
||||
; GFX7: $vgpr0 = COPY [[COPY7]](s32)
|
||||
; GFX8-LABEL: name: bswap_s8
|
||||
; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
|
||||
; GFX8: [[BSWAP:%[0-9]+]]:_(s16) = G_BSWAP [[TRUNC]]
|
||||
; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
|
||||
; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[BSWAP]], [[C]](s16)
|
||||
; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16)
|
||||
; GFX8: $vgpr0 = COPY [[ANYEXT]](s32)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s8) = G_TRUNC %0
|
||||
%2:_(s8) = G_BSWAP %1
|
||||
|
@ -28,14 +47,27 @@ name: bswap_s16
|
|||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
; CHECK-LABEL: name: bswap_s16
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
|
||||
; CHECK: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY1]]
|
||||
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
|
||||
; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP]], [[C]](s32)
|
||||
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
|
||||
; CHECK: $vgpr0 = COPY [[COPY2]](s32)
|
||||
; GFX7-LABEL: name: bswap_s16
|
||||
; GFX7: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX7: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
|
||||
; GFX7: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
|
||||
; GFX7: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32)
|
||||
; GFX7: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
|
||||
; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
|
||||
; GFX7: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
|
||||
; GFX7: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
|
||||
; GFX7: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
|
||||
; GFX7: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY2]](s32)
|
||||
; GFX7: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
|
||||
; GFX7: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC1]], [[TRUNC]]
|
||||
; GFX7: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
|
||||
; GFX7: $vgpr0 = COPY [[ANYEXT]](s32)
|
||||
; GFX8-LABEL: name: bswap_s16
|
||||
; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
|
||||
; GFX8: [[BSWAP:%[0-9]+]]:_(s16) = G_BSWAP [[TRUNC]]
|
||||
; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BSWAP]](s16)
|
||||
; GFX8: $vgpr0 = COPY [[ANYEXT]](s32)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s16) = G_TRUNC %0
|
||||
%2:_(s16) = G_BSWAP %1
|
||||
|
@ -49,14 +81,32 @@ name: bswap_s24
|
|||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
; CHECK-LABEL: name: bswap_s24
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
|
||||
; CHECK: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY1]]
|
||||
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
|
||||
; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP]], [[C]](s32)
|
||||
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
|
||||
; CHECK: $vgpr0 = COPY [[COPY2]](s32)
|
||||
; GFX7-LABEL: name: bswap_s24
|
||||
; GFX7: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX7: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
|
||||
; GFX7: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215
|
||||
; GFX7: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
|
||||
; GFX7: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
|
||||
; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
|
||||
; GFX7: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[AND]](s32)
|
||||
; GFX7: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
|
||||
; GFX7: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
|
||||
; GFX7: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
|
||||
; GFX7: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
|
||||
; GFX7: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[AND1]](s32)
|
||||
; GFX7: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
|
||||
; GFX7: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SHL]](s32)
|
||||
; GFX7: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY5]], [[COPY6]]
|
||||
; GFX7: [[COPY7:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
|
||||
; GFX7: $vgpr0 = COPY [[COPY7]](s32)
|
||||
; GFX8-LABEL: name: bswap_s24
|
||||
; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
|
||||
; GFX8: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY1]]
|
||||
; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
|
||||
; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP]], [[C]](s32)
|
||||
; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
|
||||
; GFX8: $vgpr0 = COPY [[COPY2]](s32)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s24) = G_TRUNC %0
|
||||
%2:_(s24) = G_BSWAP %1
|
||||
|
@ -70,10 +120,14 @@ name: bswap_s32
|
|||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
; CHECK-LABEL: name: bswap_s32
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; CHECK: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY]]
|
||||
; CHECK: $vgpr0 = COPY [[BSWAP]](s32)
|
||||
; GFX7-LABEL: name: bswap_s32
|
||||
; GFX7: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX7: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY]]
|
||||
; GFX7: $vgpr0 = COPY [[BSWAP]](s32)
|
||||
; GFX8-LABEL: name: bswap_s32
|
||||
; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX8: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY]]
|
||||
; GFX8: $vgpr0 = COPY [[BSWAP]](s32)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = G_BSWAP %0
|
||||
$vgpr0 = COPY %1
|
||||
|
@ -85,45 +139,259 @@ name: bswap_v2s16
|
|||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
; CHECK-LABEL: name: bswap_v2s16
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
|
||||
; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
|
||||
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
|
||||
; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
|
||||
; CHECK: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY1]]
|
||||
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
|
||||
; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP]], [[COPY2]](s32)
|
||||
; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
|
||||
; CHECK: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[COPY3]]
|
||||
; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP1]], [[C]](s32)
|
||||
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
|
||||
; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
|
||||
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
|
||||
; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
|
||||
; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
|
||||
; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
|
||||
; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
|
||||
; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
|
||||
; CHECK: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
|
||||
; GFX7-LABEL: name: bswap_v2s16
|
||||
; GFX7: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
|
||||
; GFX7: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
|
||||
; GFX7: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
|
||||
; GFX7: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
|
||||
; GFX7: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
|
||||
; GFX7: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
|
||||
; GFX7: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C1]](s32)
|
||||
; GFX7: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
|
||||
; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
|
||||
; GFX7: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
|
||||
; GFX7: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
|
||||
; GFX7: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
|
||||
; GFX7: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY2]](s32)
|
||||
; GFX7: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
|
||||
; GFX7: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC1]], [[TRUNC]]
|
||||
; GFX7: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
|
||||
; GFX7: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
|
||||
; GFX7: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[COPY4]](s32)
|
||||
; GFX7: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
|
||||
; GFX7: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
|
||||
; GFX7: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
|
||||
; GFX7: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]]
|
||||
; GFX7: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY6]](s32)
|
||||
; GFX7: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
|
||||
; GFX7: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC3]], [[TRUNC2]]
|
||||
; GFX7: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
|
||||
; GFX7: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
|
||||
; GFX7: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
|
||||
; GFX7: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
|
||||
; GFX7: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
|
||||
; GFX7: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
|
||||
; GFX8-LABEL: name: bswap_v2s16
|
||||
; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
|
||||
; GFX8: [[BSWAP:%[0-9]+]]:_(<2 x s16>) = G_BSWAP [[COPY]]
|
||||
; GFX8: $vgpr0 = COPY [[BSWAP]](<2 x s16>)
|
||||
%0:_(<2 x s16>) = COPY $vgpr0
|
||||
%1:_(<2 x s16>) = G_BSWAP %0
|
||||
$vgpr0 = COPY %1
|
||||
...
|
||||
|
||||
---
|
||||
name: bswap_v3s16
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1, $vgpr2
|
||||
; GFX7-LABEL: name: bswap_v3s16
|
||||
; GFX7: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX7: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
|
||||
; GFX7: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
|
||||
; GFX7: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
|
||||
; GFX7: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32)
|
||||
; GFX7: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
|
||||
; GFX7: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32)
|
||||
; GFX7: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
|
||||
; GFX7: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
|
||||
; GFX7: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
|
||||
; GFX7: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY4]](s32)
|
||||
; GFX7: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
|
||||
; GFX7: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC1]], [[TRUNC]]
|
||||
; GFX7: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C]](s32)
|
||||
; GFX7: [[COPY7:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
|
||||
; GFX7: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[COPY6]](s32)
|
||||
; GFX7: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
|
||||
; GFX7: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C]](s32)
|
||||
; GFX7: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
|
||||
; GFX7: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]]
|
||||
; GFX7: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY8]](s32)
|
||||
; GFX7: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
|
||||
; GFX7: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC3]], [[TRUNC2]]
|
||||
; GFX7: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C]](s32)
|
||||
; GFX7: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
|
||||
; GFX7: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY11]], [[COPY10]](s32)
|
||||
; GFX7: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
|
||||
; GFX7: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C]](s32)
|
||||
; GFX7: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
|
||||
; GFX7: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]]
|
||||
; GFX7: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY12]](s32)
|
||||
; GFX7: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
|
||||
; GFX7: [[OR2:%[0-9]+]]:_(s16) = G_OR [[TRUNC5]], [[TRUNC4]]
|
||||
; GFX7: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
|
||||
; GFX7: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
|
||||
; GFX7: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
|
||||
; GFX7: $vgpr0 = COPY [[ANYEXT]](s32)
|
||||
; GFX7: $vgpr1 = COPY [[ANYEXT1]](s32)
|
||||
; GFX7: $vgpr2 = COPY [[ANYEXT2]](s32)
|
||||
; GFX8-LABEL: name: bswap_v3s16
|
||||
; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
|
||||
; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
|
||||
; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
|
||||
; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
|
||||
; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
|
||||
; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
|
||||
; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]]
|
||||
; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
|
||||
; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
|
||||
; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
|
||||
; GFX8: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
|
||||
; GFX8: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
|
||||
; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C]]
|
||||
; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
|
||||
; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
|
||||
; GFX8: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
|
||||
; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
|
||||
; GFX8: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0
|
||||
; GFX8: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
|
||||
; GFX8: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
|
||||
; GFX8: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
|
||||
; GFX8: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
|
||||
; GFX8: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0
|
||||
; GFX8: [[BSWAP:%[0-9]+]]:_(<2 x s16>) = G_BSWAP [[EXTRACT2]]
|
||||
; GFX8: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0
|
||||
; GFX8: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT1]], [[BSWAP]](<2 x s16>), 0
|
||||
; GFX8: [[EXTRACT3:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT2]](<4 x s16>), 0
|
||||
; GFX8: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16)
|
||||
; GFX8: [[BSWAP1:%[0-9]+]]:_(s16) = G_BSWAP [[COPY6]]
|
||||
; GFX8: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT3]](<3 x s16>), 0
|
||||
; GFX8: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT3]], [[BSWAP1]](s16), 32
|
||||
; GFX8: [[EXTRACT4:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT4]](<4 x s16>), 0
|
||||
; GFX8: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT4]](<3 x s16>), 0
|
||||
; GFX8: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT5]](<4 x s16>)
|
||||
; GFX8: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
|
||||
; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32)
|
||||
; GFX8: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
|
||||
; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
|
||||
; GFX8: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
|
||||
; GFX8: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
|
||||
; GFX8: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
|
||||
; GFX8: $vgpr0 = COPY [[COPY7]](s32)
|
||||
; GFX8: $vgpr1 = COPY [[COPY8]](s32)
|
||||
; GFX8: $vgpr2 = COPY [[COPY9]](s32)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(s32) = COPY $vgpr2
|
||||
%3:_(s16) = G_TRUNC %0
|
||||
%4:_(s16) = G_TRUNC %1
|
||||
%5:_(s16) = G_TRUNC %2
|
||||
|
||||
%6:_(<3 x s16>) = G_BUILD_VECTOR %3, %4, %5
|
||||
%7:_(<3 x s16>) = G_BSWAP %6
|
||||
%8:_(s16), %9:_(s16), %10:_(s16) = G_UNMERGE_VALUES %7
|
||||
%11:_(s32) = G_ANYEXT %8
|
||||
%12:_(s32) = G_ANYEXT %9
|
||||
%13:_(s32) = G_ANYEXT %10
|
||||
$vgpr0 = COPY %11
|
||||
$vgpr1 = COPY %12
|
||||
$vgpr2 = COPY %13
|
||||
...
|
||||
|
||||
---
|
||||
name: bswap_v4s16
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
; GFX7-LABEL: name: bswap_v4s16
|
||||
; GFX7: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
|
||||
; GFX7: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
|
||||
; GFX7: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
|
||||
; GFX7: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
|
||||
; GFX7: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
|
||||
; GFX7: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
|
||||
; GFX7: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
|
||||
; GFX7: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
|
||||
; GFX7: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C1]](s32)
|
||||
; GFX7: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
|
||||
; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
|
||||
; GFX7: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
|
||||
; GFX7: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
|
||||
; GFX7: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
|
||||
; GFX7: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY2]](s32)
|
||||
; GFX7: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
|
||||
; GFX7: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC1]], [[TRUNC]]
|
||||
; GFX7: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
|
||||
; GFX7: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
|
||||
; GFX7: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[COPY4]](s32)
|
||||
; GFX7: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
|
||||
; GFX7: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
|
||||
; GFX7: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
|
||||
; GFX7: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]]
|
||||
; GFX7: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY6]](s32)
|
||||
; GFX7: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
|
||||
; GFX7: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC3]], [[TRUNC2]]
|
||||
; GFX7: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
|
||||
; GFX7: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
|
||||
; GFX7: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY9]], [[COPY8]](s32)
|
||||
; GFX7: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
|
||||
; GFX7: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
|
||||
; GFX7: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
|
||||
; GFX7: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C2]]
|
||||
; GFX7: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY10]](s32)
|
||||
; GFX7: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32)
|
||||
; GFX7: [[OR2:%[0-9]+]]:_(s16) = G_OR [[TRUNC5]], [[TRUNC4]]
|
||||
; GFX7: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
|
||||
; GFX7: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
|
||||
; GFX7: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY13]], [[COPY12]](s32)
|
||||
; GFX7: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
|
||||
; GFX7: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
|
||||
; GFX7: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
|
||||
; GFX7: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C2]]
|
||||
; GFX7: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY14]](s32)
|
||||
; GFX7: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32)
|
||||
; GFX7: [[OR3:%[0-9]+]]:_(s16) = G_OR [[TRUNC7]], [[TRUNC6]]
|
||||
; GFX7: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
|
||||
; GFX7: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
|
||||
; GFX7: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
|
||||
; GFX7: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
|
||||
; GFX7: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
|
||||
; GFX7: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
|
||||
; GFX7: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
|
||||
; GFX7: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32)
|
||||
; GFX7: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
|
||||
; GFX7: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
|
||||
; GFX7: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
|
||||
; GFX8-LABEL: name: bswap_v4s16
|
||||
; GFX8: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
|
||||
; GFX8: [[BSWAP:%[0-9]+]]:_(<2 x s16>) = G_BSWAP [[UV]]
|
||||
; GFX8: [[BSWAP1:%[0-9]+]]:_(<2 x s16>) = G_BSWAP [[UV1]]
|
||||
; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BSWAP]](<2 x s16>), [[BSWAP1]](<2 x s16>)
|
||||
; GFX8: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
|
||||
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
|
||||
%1:_(<4 x s16>) = G_BSWAP %0
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
...
|
||||
|
||||
---
|
||||
name: bswap_v2s32
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
; CHECK-LABEL: name: bswap_v2s32
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
|
||||
; CHECK: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[UV]]
|
||||
; CHECK: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV1]]
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[BSWAP]](s32), [[BSWAP1]](s32)
|
||||
; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
|
||||
; GFX7-LABEL: name: bswap_v2s32
|
||||
; GFX7: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
|
||||
; GFX7: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[UV]]
|
||||
; GFX7: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV1]]
|
||||
; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[BSWAP]](s32), [[BSWAP1]](s32)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
|
||||
; GFX8-LABEL: name: bswap_v2s32
|
||||
; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
|
||||
; GFX8: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[UV]]
|
||||
; GFX8: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV1]]
|
||||
; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[BSWAP]](s32), [[BSWAP1]](s32)
|
||||
; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
|
||||
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||
%1:_(<2 x s32>) = G_BSWAP %0
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
|
@ -135,14 +403,58 @@ name: bswap_s64
|
|||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
; CHECK-LABEL: name: bswap_s64
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
|
||||
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
|
||||
; CHECK: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[UV1]]
|
||||
; CHECK: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV]]
|
||||
; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[BSWAP]](s32), [[BSWAP1]](s32)
|
||||
; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64)
|
||||
; GFX7-LABEL: name: bswap_s64
|
||||
; GFX7: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
|
||||
; GFX7: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[UV1]]
|
||||
; GFX7: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV]]
|
||||
; GFX7: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[BSWAP]](s32), [[BSWAP1]](s32)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[MV]](s64)
|
||||
; GFX8-LABEL: name: bswap_s64
|
||||
; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
|
||||
; GFX8: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[UV1]]
|
||||
; GFX8: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV]]
|
||||
; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[BSWAP]](s32), [[BSWAP1]](s32)
|
||||
; GFX8: $vgpr0_vgpr1 = COPY [[MV]](s64)
|
||||
%0:_(s64) = COPY $vgpr0_vgpr1
|
||||
%1:_(s64) = G_BSWAP %0
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
...
|
||||
|
||||
---
|
||||
name: bswap_v2s64
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
; GFX7-LABEL: name: bswap_v2s64
|
||||
; GFX7: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
; GFX7: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
|
||||
; GFX7: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
|
||||
; GFX7: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[UV3]]
|
||||
; GFX7: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV2]]
|
||||
; GFX7: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[BSWAP]](s32), [[BSWAP1]](s32)
|
||||
; GFX7: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
|
||||
; GFX7: [[BSWAP2:%[0-9]+]]:_(s32) = G_BSWAP [[UV5]]
|
||||
; GFX7: [[BSWAP3:%[0-9]+]]:_(s32) = G_BSWAP [[UV4]]
|
||||
; GFX7: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[BSWAP2]](s32), [[BSWAP3]](s32)
|
||||
; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
|
||||
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
|
||||
; GFX8-LABEL: name: bswap_v2s64
|
||||
; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
|
||||
; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
|
||||
; GFX8: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[UV3]]
|
||||
; GFX8: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV2]]
|
||||
; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[BSWAP]](s32), [[BSWAP1]](s32)
|
||||
; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
|
||||
; GFX8: [[BSWAP2:%[0-9]+]]:_(s32) = G_BSWAP [[UV5]]
|
||||
; GFX8: [[BSWAP3:%[0-9]+]]:_(s32) = G_BSWAP [[UV4]]
|
||||
; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[BSWAP2]](s32), [[BSWAP3]](s32)
|
||||
; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
|
||||
; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
|
||||
%0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
%1:_(<2 x s64>) = G_BSWAP %0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
|
||||
...
|
||||
|
|
Loading…
Reference in New Issue