GlobalISel: Add and_trivial_mask to all_combines

Also make up a new category of combines.
This commit is contained in:
Matt Arsenault 2020-08-27 13:15:46 -04:00
parent 4ef9275b9b
commit 201f770f16
14 changed files with 1811 additions and 1994 deletions

View File

@ -357,6 +357,8 @@ def identity_combines : GICombineGroup<[select_same_val, right_identity_zero,
binop_right_to_zero, p2i_to_i2p,
i2p_to_p2i]>;
def known_bits_simplifications : GICombineGroup<[and_trivial_mask]>;
def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend]>;
def select_combines : GICombineGroup<[select_undef_cmp, select_constant_cmp]>;
@ -367,4 +369,5 @@ def all_combines : GICombineGroup<[trivial_combines, ptr_add_immed_chain,
identity_combines, simplify_add_to_sub,
hoist_logic_op_with_same_opcode_hands,
shl_ashr_to_sext_inreg, sext_inreg_of_load,
width_reduction_combines, select_combines]>;
width_reduction_combines, select_combines,
known_bits_simplifications]>;

View File

@ -159,11 +159,10 @@ define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_splat(<2 x i16> inreg %a) {
;
; GFX8-LABEL: s_add_v2i16_neg_inline_imm_splat:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_lshr_b32 s2, s0, 16
; GFX8-NEXT: s_mov_b32 s3, 0xffff
; GFX8-NEXT: s_lshr_b32 s2, s0, 16
; GFX8-NEXT: s_mov_b32 s1, 0xffc0
; GFX8-NEXT: s_and_b32 s0, s0, s3
; GFX8-NEXT: s_and_b32 s2, s2, s3
; GFX8-NEXT: s_add_i32 s0, s0, s1
; GFX8-NEXT: s_add_i32 s2, s2, s1
; GFX8-NEXT: s_lshl_b32 s1, s2, 16
@ -186,10 +185,9 @@ define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_lo(<2 x i16> inreg %a) {
;
; GFX8-LABEL: s_add_v2i16_neg_inline_imm_lo:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_lshr_b32 s1, s0, 16
; GFX8-NEXT: s_mov_b32 s2, 0xffff
; GFX8-NEXT: s_lshr_b32 s1, s0, 16
; GFX8-NEXT: s_and_b32 s0, s0, s2
; GFX8-NEXT: s_and_b32 s1, s1, s2
; GFX8-NEXT: s_add_i32 s0, s0, 0xffc0
; GFX8-NEXT: s_add_i32 s1, s1, 4
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
@ -212,10 +210,9 @@ define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_hi(<2 x i16> inreg %a) {
;
; GFX8-LABEL: s_add_v2i16_neg_inline_imm_hi:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_lshr_b32 s1, s0, 16
; GFX8-NEXT: s_mov_b32 s2, 0xffff
; GFX8-NEXT: s_lshr_b32 s1, s0, 16
; GFX8-NEXT: s_and_b32 s0, s0, s2
; GFX8-NEXT: s_and_b32 s1, s1, s2
; GFX8-NEXT: s_add_i32 s0, s0, 4
; GFX8-NEXT: s_add_i32 s1, s1, 0xffc0
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
@ -239,13 +236,11 @@ define amdgpu_ps i32 @s_add_v2i16(<2 x i16> inreg %a, <2 x i16> inreg %b) {
;
; GFX8-LABEL: s_add_v2i16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_lshr_b32 s2, s0, 16
; GFX8-NEXT: s_mov_b32 s3, 0xffff
; GFX8-NEXT: s_lshr_b32 s2, s0, 16
; GFX8-NEXT: s_lshr_b32 s4, s1, 16
; GFX8-NEXT: s_and_b32 s0, s0, s3
; GFX8-NEXT: s_and_b32 s1, s1, s3
; GFX8-NEXT: s_and_b32 s2, s2, s3
; GFX8-NEXT: s_and_b32 s4, s4, s3
; GFX8-NEXT: s_add_i32 s0, s0, s1
; GFX8-NEXT: s_add_i32 s2, s2, s4
; GFX8-NEXT: s_lshl_b32 s1, s2, 16
@ -271,13 +266,11 @@ define amdgpu_ps i32 @s_add_v2i16_fneg_lhs(<2 x half> inreg %a, <2 x i16> inreg
; GFX8-LABEL: s_add_v2i16_fneg_lhs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_xor_b32 s0, s0, 0x80008000
; GFX8-NEXT: s_lshr_b32 s2, s0, 16
; GFX8-NEXT: s_mov_b32 s3, 0xffff
; GFX8-NEXT: s_lshr_b32 s2, s0, 16
; GFX8-NEXT: s_lshr_b32 s4, s1, 16
; GFX8-NEXT: s_and_b32 s0, s0, s3
; GFX8-NEXT: s_and_b32 s1, s1, s3
; GFX8-NEXT: s_and_b32 s2, s2, s3
; GFX8-NEXT: s_and_b32 s4, s4, s3
; GFX8-NEXT: s_add_i32 s0, s0, s1
; GFX8-NEXT: s_add_i32 s2, s2, s4
; GFX8-NEXT: s_lshl_b32 s1, s2, 16
@ -305,13 +298,11 @@ define amdgpu_ps i32 @s_add_v2i16_fneg_rhs(<2 x i16> inreg %a, <2 x half> inreg
; GFX8-LABEL: s_add_v2i16_fneg_rhs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_xor_b32 s1, s1, 0x80008000
; GFX8-NEXT: s_lshr_b32 s2, s0, 16
; GFX8-NEXT: s_mov_b32 s3, 0xffff
; GFX8-NEXT: s_lshr_b32 s2, s0, 16
; GFX8-NEXT: s_lshr_b32 s4, s1, 16
; GFX8-NEXT: s_and_b32 s0, s0, s3
; GFX8-NEXT: s_and_b32 s1, s1, s3
; GFX8-NEXT: s_and_b32 s2, s2, s3
; GFX8-NEXT: s_and_b32 s4, s4, s3
; GFX8-NEXT: s_add_i32 s0, s0, s1
; GFX8-NEXT: s_add_i32 s2, s2, s4
; GFX8-NEXT: s_lshl_b32 s1, s2, 16
@ -343,13 +334,11 @@ define amdgpu_ps i32 @s_add_v2i16_fneg_lhs_fneg_rhs(<2 x half> inreg %a, <2 x ha
; GFX8-NEXT: s_mov_b32 s2, 0x80008000
; GFX8-NEXT: s_xor_b32 s1, s1, s2
; GFX8-NEXT: s_xor_b32 s0, s0, s2
; GFX8-NEXT: s_lshr_b32 s2, s0, 16
; GFX8-NEXT: s_mov_b32 s3, 0xffff
; GFX8-NEXT: s_lshr_b32 s2, s0, 16
; GFX8-NEXT: s_lshr_b32 s4, s1, 16
; GFX8-NEXT: s_and_b32 s0, s0, s3
; GFX8-NEXT: s_and_b32 s1, s1, s3
; GFX8-NEXT: s_and_b32 s2, s2, s3
; GFX8-NEXT: s_and_b32 s4, s4, s3
; GFX8-NEXT: s_add_i32 s0, s0, s1
; GFX8-NEXT: s_add_i32 s2, s2, s4
; GFX8-NEXT: s_lshl_b32 s1, s2, 16

View File

@ -47,8 +47,8 @@ define i8 @v_ashr_i8_7(i8 %value) {
; GFX9-LABEL: v_ashr_i8_7:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s4, 7
; GFX9-NEXT: v_ashrrev_i16_sdwa v0, s4, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX9-NEXT: v_mov_b32_e32 v1, 7
; GFX9-NEXT: v_ashrrev_i16_sdwa v0, v1, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%result = ashr i8 %value, 7
ret i8 %result

View File

@ -514,7 +514,6 @@ define i64 @v_bswap_i48(i64 %src) {
; GFX7-NEXT: v_alignbit_b32 v0, v0, v0, 24
; GFX7-NEXT: v_bfi_b32 v2, s4, v0, v2
; GFX7-NEXT: v_lshr_b64 v[0:1], v[1:2], 16
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_bswap_i48:
@ -524,7 +523,6 @@ define i64 @v_bswap_i48(i64 %src) {
; GFX8-NEXT: v_perm_b32 v1, 0, v1, s4
; GFX8-NEXT: v_perm_b32 v2, 0, v0, s4
; GFX8-NEXT: v_lshrrev_b64 v[0:1], 16, v[1:2]
; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_bswap_i48:
@ -534,7 +532,6 @@ define i64 @v_bswap_i48(i64 %src) {
; GFX9-NEXT: v_perm_b32 v1, 0, v1, s4
; GFX9-NEXT: v_perm_b32 v2, 0, v0, s4
; GFX9-NEXT: v_lshrrev_b64 v[0:1], 16, v[1:2]
; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
%trunc = trunc i64 %src to i48
%bswap = call i48 @llvm.bswap.i48(i48 %trunc)

View File

@ -224,30 +224,28 @@ define <4 x float> @v_uitofp_v4i8_to_v4f32(i32 %arg0) nounwind {
; SI-LABEL: v_uitofp_v4i8_to_v4f32:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: v_lshrrev_b32_e32 v1, 8, v0
; SI-NEXT: s_movk_i32 s4, 0xff
; SI-NEXT: v_lshrrev_b32_e32 v1, 8, v0
; SI-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; SI-NEXT: v_lshrrev_b32_e32 v3, 24, v0
; SI-NEXT: v_and_b32_e32 v0, s4, v0
; SI-NEXT: v_and_b32_e32 v3, s4, v0
; SI-NEXT: v_and_b32_e32 v1, s4, v1
; SI-NEXT: v_and_b32_e32 v2, s4, v2
; SI-NEXT: v_and_b32_e32 v3, s4, v3
; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
; SI-NEXT: v_cvt_f32_ubyte0_e32 v4, v3
; SI-NEXT: v_cvt_f32_ubyte3_e32 v3, v0
; SI-NEXT: v_cvt_f32_ubyte0_e32 v1, v1
; SI-NEXT: v_cvt_f32_ubyte0_e32 v2, v2
; SI-NEXT: v_cvt_f32_ubyte0_e32 v3, v3
; SI-NEXT: v_mov_b32_e32 v0, v4
; SI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: v_uitofp_v4i8_to_v4f32:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: s_movk_i32 s4, 0xff
; VI-NEXT: v_mov_b32_e32 v3, s4
; VI-NEXT: v_mov_b32_e32 v2, s4
; VI-NEXT: v_lshrrev_b32_e32 v1, 8, v0
; VI-NEXT: v_and_b32_sdwa v2, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-NEXT: v_and_b32_sdwa v2, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-NEXT: v_cvt_f32_ubyte0_sdwa v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
; VI-NEXT: v_and_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
; VI-NEXT: v_cvt_f32_ubyte0_e32 v3, v0
; VI-NEXT: v_cvt_f32_ubyte3_e32 v3, v0
; VI-NEXT: v_cvt_f32_ubyte0_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
; VI-NEXT: v_cvt_f32_ubyte0_e32 v2, v2
; VI-NEXT: v_mov_b32_e32 v0, v4

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -46,8 +46,8 @@ define i8 @v_lshr_i8_7(i8 %value) {
; GFX9-LABEL: v_lshr_i8_7:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s4, 7
; GFX9-NEXT: v_lshrrev_b16_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX9-NEXT: v_mov_b32_e32 v1, 7
; GFX9-NEXT: v_lshrrev_b16_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%result = lshr i8 %value, 7
ret i8 %result
@ -557,13 +557,11 @@ define amdgpu_ps i32 @s_lshr_v2i16(<2 x i16> inreg %value, <2 x i16> inreg %amou
;
; GFX8-LABEL: s_lshr_v2i16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_lshr_b32 s2, s0, 16
; GFX8-NEXT: s_mov_b32 s3, 0xffff
; GFX8-NEXT: s_lshr_b32 s2, s0, 16
; GFX8-NEXT: s_lshr_b32 s4, s1, 16
; GFX8-NEXT: s_and_b32 s0, s0, s3
; GFX8-NEXT: s_and_b32 s1, s1, s3
; GFX8-NEXT: s_and_b32 s2, s2, s3
; GFX8-NEXT: s_and_b32 s4, s4, s3
; GFX8-NEXT: s_lshr_b32 s0, s0, s1
; GFX8-NEXT: s_lshr_b32 s1, s2, s4
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
@ -740,21 +738,17 @@ define amdgpu_ps <2 x i32> @s_lshr_v4i16(<4 x i16> inreg %value, <4 x i16> inreg
;
; GFX8-LABEL: s_lshr_v4i16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_lshr_b32 s4, s0, 16
; GFX8-NEXT: s_mov_b32 s6, 0xffff
; GFX8-NEXT: s_lshr_b32 s4, s0, 16
; GFX8-NEXT: s_lshr_b32 s7, s2, 16
; GFX8-NEXT: s_lshr_b32 s5, s1, 16
; GFX8-NEXT: s_lshr_b32 s8, s3, 16
; GFX8-NEXT: s_and_b32 s0, s0, s6
; GFX8-NEXT: s_and_b32 s2, s2, s6
; GFX8-NEXT: s_and_b32 s4, s4, s6
; GFX8-NEXT: s_and_b32 s7, s7, s6
; GFX8-NEXT: s_lshr_b32 s0, s0, s2
; GFX8-NEXT: s_lshr_b32 s2, s4, s7
; GFX8-NEXT: s_lshr_b32 s5, s1, 16
; GFX8-NEXT: s_lshr_b32 s8, s3, 16
; GFX8-NEXT: s_and_b32 s1, s1, s6
; GFX8-NEXT: s_and_b32 s3, s3, s6
; GFX8-NEXT: s_and_b32 s5, s5, s6
; GFX8-NEXT: s_and_b32 s8, s8, s6
; GFX8-NEXT: s_lshr_b32 s1, s1, s3
; GFX8-NEXT: s_lshr_b32 s3, s5, s8
; GFX8-NEXT: s_lshl_b32 s2, s2, 16
@ -932,39 +926,31 @@ define amdgpu_ps <4 x i32> @s_lshr_v8i16(<8 x i16> inreg %value, <8 x i16> inreg
;
; GFX8-LABEL: s_lshr_v8i16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_lshr_b32 s8, s0, 16
; GFX8-NEXT: s_mov_b32 s12, 0xffff
; GFX8-NEXT: s_lshr_b32 s8, s0, 16
; GFX8-NEXT: s_lshr_b32 s13, s4, 16
; GFX8-NEXT: s_lshr_b32 s9, s1, 16
; GFX8-NEXT: s_lshr_b32 s14, s5, 16
; GFX8-NEXT: s_and_b32 s0, s0, s12
; GFX8-NEXT: s_and_b32 s4, s4, s12
; GFX8-NEXT: s_and_b32 s8, s8, s12
; GFX8-NEXT: s_and_b32 s13, s13, s12
; GFX8-NEXT: s_lshr_b32 s10, s2, 16
; GFX8-NEXT: s_lshr_b32 s15, s6, 16
; GFX8-NEXT: s_lshr_b32 s0, s0, s4
; GFX8-NEXT: s_lshr_b32 s4, s8, s13
; GFX8-NEXT: s_lshr_b32 s9, s1, 16
; GFX8-NEXT: s_lshr_b32 s14, s5, 16
; GFX8-NEXT: s_and_b32 s1, s1, s12
; GFX8-NEXT: s_and_b32 s5, s5, s12
; GFX8-NEXT: s_and_b32 s9, s9, s12
; GFX8-NEXT: s_and_b32 s14, s14, s12
; GFX8-NEXT: s_lshr_b32 s11, s3, 16
; GFX8-NEXT: s_lshr_b32 s16, s7, 16
; GFX8-NEXT: s_lshr_b32 s1, s1, s5
; GFX8-NEXT: s_lshr_b32 s10, s2, 16
; GFX8-NEXT: s_lshr_b32 s15, s6, 16
; GFX8-NEXT: s_and_b32 s2, s2, s12
; GFX8-NEXT: s_and_b32 s6, s6, s12
; GFX8-NEXT: s_and_b32 s10, s10, s12
; GFX8-NEXT: s_and_b32 s15, s15, s12
; GFX8-NEXT: s_lshr_b32 s5, s9, s14
; GFX8-NEXT: s_lshl_b32 s4, s4, 16
; GFX8-NEXT: s_and_b32 s0, s0, s12
; GFX8-NEXT: s_lshr_b32 s2, s2, s6
; GFX8-NEXT: s_lshr_b32 s11, s3, 16
; GFX8-NEXT: s_lshr_b32 s16, s7, 16
; GFX8-NEXT: s_or_b32 s0, s4, s0
; GFX8-NEXT: s_and_b32 s3, s3, s12
; GFX8-NEXT: s_and_b32 s7, s7, s12
; GFX8-NEXT: s_and_b32 s11, s11, s12
; GFX8-NEXT: s_and_b32 s16, s16, s12
; GFX8-NEXT: s_lshr_b32 s6, s10, s15
; GFX8-NEXT: s_lshl_b32 s4, s5, 16
; GFX8-NEXT: s_and_b32 s1, s1, s12

View File

@ -0,0 +1,22 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
---
name: remove_and_255_zextload
legalized: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1
; CHECK-LABEL: name: remove_and_255_zextload
; CHECK: liveins: $vgpr0_vgpr1
; CHECK: %ptr:_(p1) = COPY $vgpr0_vgpr1
; CHECK: %load:_(s32) = G_ZEXTLOAD %ptr(p1) :: (load 1, addrspace 1)
; CHECK: $vgpr0 = COPY %load(s32)
%ptr:_(p1) = COPY $vgpr0_vgpr1
%load:_(s32) = G_ZEXTLOAD %ptr :: (load 1, addrspace 1, align 1)
%mask:_(s32) = G_CONSTANT i32 255
%and:_(s32) = G_AND %load, %mask
$vgpr0 = COPY %and
...

View File

@ -461,7 +461,6 @@ define amdgpu_ps <2 x i32> @s_shl_v2i32_zext_v2i16(<2 x i16> inreg %x) {
; GFX7-NEXT: s_and_b32 s0, s0, 0x3fff3fff
; GFX7-NEXT: s_lshr_b32 s1, s0, 16
; GFX7-NEXT: s_and_b32 s0, s0, s2
; GFX7-NEXT: s_and_b32 s1, s1, s2
; GFX7-NEXT: s_lshl_b32 s0, s0, 2
; GFX7-NEXT: s_lshl_b32 s1, s1, 2
; GFX7-NEXT: ; return to shader part epilog
@ -473,7 +472,6 @@ define amdgpu_ps <2 x i32> @s_shl_v2i32_zext_v2i16(<2 x i16> inreg %x) {
; GFX8-NEXT: s_lshr_b32 s1, s0, 16
; GFX8-NEXT: s_mov_b32 s3, s2
; GFX8-NEXT: s_and_b32 s0, s0, s4
; GFX8-NEXT: s_and_b32 s1, s1, s4
; GFX8-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
; GFX8-NEXT: s_mov_b32 s5, s4
; GFX8-NEXT: s_and_b64 s[0:1], s[0:1], s[4:5]
@ -485,9 +483,7 @@ define amdgpu_ps <2 x i32> @s_shl_v2i32_zext_v2i16(<2 x i16> inreg %x) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_and_b32 s0, s0, 0x3fff3fff
; GFX9-NEXT: s_lshr_b32 s1, s0, 16
; GFX9-NEXT: s_mov_b32 s2, 0xffff
; GFX9-NEXT: s_and_b32 s0, s0, s2
; GFX9-NEXT: s_and_b32 s1, s1, s2
; GFX9-NEXT: s_and_b32 s0, s0, 0xffff
; GFX9-NEXT: s_lshl_b32 s0, s0, 2
; GFX9-NEXT: s_lshl_b32 s1, s1, 2
; GFX9-NEXT: ; return to shader part epilog
@ -506,14 +502,13 @@ define <2 x i32> @v_shl_v2i32_zext_v2i16(<2 x i16> %x) {
; GFX7-LABEL: v_shl_v2i32_zext_v2i16:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_mov_b32_e32 v2, 0xffff
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7-NEXT: v_and_b32_e32 v0, v0, v2
; GFX7-NEXT: v_or_b32_e32 v0, v1, v0
; GFX7-NEXT: v_and_b32_e32 v0, 0x3fff3fff, v0
; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX7-NEXT: s_mov_b32 s4, 0xffff
; GFX7-NEXT: v_and_b32_e32 v0, s4, v0
; GFX7-NEXT: v_and_b32_e32 v1, s4, v1
; GFX7-NEXT: v_and_b32_e32 v0, v0, v2
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 2, v1
; GFX7-NEXT: s_setpc_b64 s[30:31]
@ -521,24 +516,19 @@ define <2 x i32> @v_shl_v2i32_zext_v2i16(<2 x i16> %x) {
; GFX8-LABEL: v_shl_v2i32_zext_v2i16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_mov_b32 s4, 0xffff
; GFX8-NEXT: v_and_b32_e32 v0, 0x3fff3fff, v0
; GFX8-NEXT: v_mov_b32_e32 v1, s4
; GFX8-NEXT: v_and_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_and_b32_e32 v1, 0x3fff3fff, v0
; GFX8-NEXT: v_mov_b32_e32 v2, 2
; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 2, v1
; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_shl_v2i32_zext_v2i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_and_b32_e32 v0, 0x3fff3fff, v0
; GFX9-NEXT: s_mov_b32 s5, 0xffff
; GFX9-NEXT: v_and_b32_sdwa v1, v0, s5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX9-NEXT: s_mov_b32 s4, 2
; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v1
; GFX9-NEXT: v_and_b32_e32 v1, 0x3fff3fff, v0
; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX9-NEXT: s_setpc_b64 s[30:31]
%and = and <2 x i16> %x, <i16 16383, i16 16383>
%ext = zext <2 x i16> %and to <2 x i32>

View File

@ -551,13 +551,11 @@ define amdgpu_ps i32 @s_shl_v2i16(<2 x i16> inreg %value, <2 x i16> inreg %amoun
;
; GFX8-LABEL: s_shl_v2i16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_lshr_b32 s2, s0, 16
; GFX8-NEXT: s_mov_b32 s3, 0xffff
; GFX8-NEXT: s_lshr_b32 s2, s0, 16
; GFX8-NEXT: s_lshr_b32 s4, s1, 16
; GFX8-NEXT: s_and_b32 s0, s0, s3
; GFX8-NEXT: s_and_b32 s1, s1, s3
; GFX8-NEXT: s_and_b32 s2, s2, s3
; GFX8-NEXT: s_and_b32 s4, s4, s3
; GFX8-NEXT: s_lshl_b32 s0, s0, s1
; GFX8-NEXT: s_lshl_b32 s1, s2, s4
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
@ -722,21 +720,17 @@ define amdgpu_ps <2 x i32> @s_shl_v4i16(<4 x i16> inreg %value, <4 x i16> inreg
;
; GFX8-LABEL: s_shl_v4i16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_lshr_b32 s4, s0, 16
; GFX8-NEXT: s_mov_b32 s6, 0xffff
; GFX8-NEXT: s_lshr_b32 s4, s0, 16
; GFX8-NEXT: s_lshr_b32 s7, s2, 16
; GFX8-NEXT: s_lshr_b32 s5, s1, 16
; GFX8-NEXT: s_lshr_b32 s8, s3, 16
; GFX8-NEXT: s_and_b32 s0, s0, s6
; GFX8-NEXT: s_and_b32 s2, s2, s6
; GFX8-NEXT: s_and_b32 s4, s4, s6
; GFX8-NEXT: s_and_b32 s7, s7, s6
; GFX8-NEXT: s_lshl_b32 s0, s0, s2
; GFX8-NEXT: s_lshl_b32 s2, s4, s7
; GFX8-NEXT: s_lshr_b32 s5, s1, 16
; GFX8-NEXT: s_lshr_b32 s8, s3, 16
; GFX8-NEXT: s_and_b32 s1, s1, s6
; GFX8-NEXT: s_and_b32 s3, s3, s6
; GFX8-NEXT: s_and_b32 s5, s5, s6
; GFX8-NEXT: s_and_b32 s8, s8, s6
; GFX8-NEXT: s_lshl_b32 s1, s1, s3
; GFX8-NEXT: s_lshl_b32 s3, s5, s8
; GFX8-NEXT: s_lshl_b32 s2, s2, 16
@ -898,39 +892,31 @@ define amdgpu_ps <4 x i32> @s_shl_v8i16(<8 x i16> inreg %value, <8 x i16> inreg
;
; GFX8-LABEL: s_shl_v8i16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_lshr_b32 s8, s0, 16
; GFX8-NEXT: s_mov_b32 s12, 0xffff
; GFX8-NEXT: s_lshr_b32 s8, s0, 16
; GFX8-NEXT: s_lshr_b32 s13, s4, 16
; GFX8-NEXT: s_lshr_b32 s9, s1, 16
; GFX8-NEXT: s_lshr_b32 s14, s5, 16
; GFX8-NEXT: s_and_b32 s0, s0, s12
; GFX8-NEXT: s_and_b32 s4, s4, s12
; GFX8-NEXT: s_and_b32 s8, s8, s12
; GFX8-NEXT: s_and_b32 s13, s13, s12
; GFX8-NEXT: s_lshr_b32 s10, s2, 16
; GFX8-NEXT: s_lshr_b32 s15, s6, 16
; GFX8-NEXT: s_lshl_b32 s0, s0, s4
; GFX8-NEXT: s_lshl_b32 s4, s8, s13
; GFX8-NEXT: s_lshr_b32 s9, s1, 16
; GFX8-NEXT: s_lshr_b32 s14, s5, 16
; GFX8-NEXT: s_and_b32 s1, s1, s12
; GFX8-NEXT: s_and_b32 s5, s5, s12
; GFX8-NEXT: s_and_b32 s9, s9, s12
; GFX8-NEXT: s_and_b32 s14, s14, s12
; GFX8-NEXT: s_lshr_b32 s11, s3, 16
; GFX8-NEXT: s_lshr_b32 s16, s7, 16
; GFX8-NEXT: s_lshl_b32 s1, s1, s5
; GFX8-NEXT: s_lshr_b32 s10, s2, 16
; GFX8-NEXT: s_lshr_b32 s15, s6, 16
; GFX8-NEXT: s_and_b32 s2, s2, s12
; GFX8-NEXT: s_and_b32 s6, s6, s12
; GFX8-NEXT: s_and_b32 s10, s10, s12
; GFX8-NEXT: s_and_b32 s15, s15, s12
; GFX8-NEXT: s_lshl_b32 s5, s9, s14
; GFX8-NEXT: s_lshl_b32 s4, s4, 16
; GFX8-NEXT: s_and_b32 s0, s0, s12
; GFX8-NEXT: s_lshl_b32 s2, s2, s6
; GFX8-NEXT: s_lshr_b32 s11, s3, 16
; GFX8-NEXT: s_lshr_b32 s16, s7, 16
; GFX8-NEXT: s_or_b32 s0, s4, s0
; GFX8-NEXT: s_and_b32 s3, s3, s12
; GFX8-NEXT: s_and_b32 s7, s7, s12
; GFX8-NEXT: s_and_b32 s11, s11, s12
; GFX8-NEXT: s_and_b32 s16, s16, s12
; GFX8-NEXT: s_lshl_b32 s6, s10, s15
; GFX8-NEXT: s_lshl_b32 s4, s5, 16
; GFX8-NEXT: s_and_b32 s1, s1, s12

View File

@ -205,10 +205,7 @@ define i16 @v_uaddsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) {
; GFX6-NEXT: v_min_u32_e32 v2, v3, v2
; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v2
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 24, v1
; GFX6-NEXT: v_mov_b32_e32 v2, 0xff
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 24, v0
; GFX6-NEXT: v_and_b32_e32 v1, v1, v2
; GFX6-NEXT: v_and_b32_e32 v0, v0, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: s_setpc_b64 s[30:31]
@ -223,10 +220,9 @@ define i16 @v_uaddsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) {
; GFX8-NEXT: v_lshlrev_b16_e32 v1, 8, v1
; GFX8-NEXT: v_add_u16_e64 v0, v0, v1 clamp
; GFX8-NEXT: v_add_u16_e64 v1, v3, v2 clamp
; GFX8-NEXT: v_mov_b32_e32 v2, 0xff
; GFX8-NEXT: v_and_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
; GFX8-NEXT: v_and_b32_sdwa v1, v1, v2 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
; GFX8-NEXT: v_lshrrev_b16_e32 v1, 8, v1
; GFX8-NEXT: v_lshlrev_b16_e32 v1, 8, v1
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_uaddsat_v2i8:
@ -291,10 +287,7 @@ define amdgpu_ps i16 @s_uaddsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
; GFX6-NEXT: s_cmp_lt_u32 s3, s2
; GFX6-NEXT: s_cselect_b32 s2, s3, s2
; GFX6-NEXT: s_add_i32 s1, s1, s2
; GFX6-NEXT: s_movk_i32 s2, 0xff
; GFX6-NEXT: s_lshr_b32 s1, s1, 24
; GFX6-NEXT: s_and_b32 s1, s1, s2
; GFX6-NEXT: s_and_b32 s0, s0, s2
; GFX6-NEXT: s_lshl_b32 s1, s1, 8
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: ; return to shader part epilog
@ -311,11 +304,10 @@ define amdgpu_ps i16 @s_uaddsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
; GFX8-NEXT: v_add_u16_e64 v0, s0, v0 clamp
; GFX8-NEXT: s_lshl_b32 s0, s2, s4
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: v_mov_b32_e32 v2, 0xff
; GFX8-NEXT: v_add_u16_e64 v1, s0, v1 clamp
; GFX8-NEXT: v_and_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
; GFX8-NEXT: v_and_b32_sdwa v1, v1, v2 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
; GFX8-NEXT: v_lshrrev_b16_e32 v1, 8, v1
; GFX8-NEXT: v_lshlrev_b16_e32 v1, 8, v1
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
; GFX8-NEXT: ; return to shader part epilog
;
@ -399,24 +391,19 @@ define i32 @v_uaddsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) {
; GFX6-NEXT: v_min_u32_e32 v3, v5, v3
; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 24, v4
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 24, v1
; GFX6-NEXT: s_movk_i32 s4, 0xff
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 24, v0
; GFX6-NEXT: v_and_b32_e32 v1, s4, v1
; GFX6-NEXT: v_lshlrev_b32_e32 v4, 24, v7
; GFX6-NEXT: v_xor_b32_e32 v5, -1, v3
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 24, v1
; GFX6-NEXT: v_min_u32_e32 v4, v5, v4
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 24, v2
; GFX6-NEXT: v_and_b32_e32 v0, s4, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1
; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v4
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 24, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 24, v2
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: v_and_b32_e32 v1, s4, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v2
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 24, v3
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: v_and_b32_e32 v1, s4, v3
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v1
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v3
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
@ -565,17 +552,12 @@ define amdgpu_ps i32 @s_uaddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
; GFX6-NEXT: s_cmp_lt_u32 s5, s4
; GFX6-NEXT: s_cselect_b32 s4, s5, s4
; GFX6-NEXT: s_add_i32 s3, s3, s4
; GFX6-NEXT: s_movk_i32 s4, 0xff
; GFX6-NEXT: s_and_b32 s1, s1, s4
; GFX6-NEXT: s_and_b32 s0, s0, s4
; GFX6-NEXT: s_lshl_b32 s1, s1, 8
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: s_and_b32 s1, s2, s4
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_lshl_b32 s1, s2, 16
; GFX6-NEXT: s_lshr_b32 s3, s3, 24
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: s_and_b32 s1, s3, s4
; GFX6-NEXT: s_lshl_b32 s1, s1, 24
; GFX6-NEXT: s_lshl_b32 s1, s3, 24
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: ; return to shader part epilog
;
@ -1892,10 +1874,7 @@ define amdgpu_ps i32 @s_uaddsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs
; GFX6-NEXT: s_cmp_lt_u32 s3, s2
; GFX6-NEXT: s_cselect_b32 s2, s3, s2
; GFX6-NEXT: s_add_i32 s1, s1, s2
; GFX6-NEXT: s_mov_b32 s2, 0xffff
; GFX6-NEXT: s_lshr_b32 s1, s1, 16
; GFX6-NEXT: s_and_b32 s1, s1, s2
; GFX6-NEXT: s_and_b32 s0, s0, s2
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: ; return to shader part epilog
@ -1946,10 +1925,7 @@ define amdgpu_ps float @uaddsat_v2i16_sv(<2 x i16> inreg %lhs, <2 x i16> %rhs) {
; GFX6-NEXT: v_min_u32_e32 v1, s1, v1
; GFX6-NEXT: v_add_i32_e32 v1, vcc, s0, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX6-NEXT: s_mov_b32 s0, 0xffff
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_and_b32_e32 v1, s0, v1
; GFX6-NEXT: v_and_b32_e32 v0, s0, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: ; return to shader part epilog
@ -1994,10 +1970,7 @@ define amdgpu_ps float @uaddsat_v2i16_vs(<2 x i16> %lhs, <2 x i16> inreg %rhs) {
; GFX6-NEXT: v_min_u32_e32 v2, s0, v2
; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v2
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX6-NEXT: s_mov_b32 s0, 0xffff
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_and_b32_e32 v1, s0, v1
; GFX6-NEXT: v_and_b32_e32 v0, s0, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: ; return to shader part epilog
@ -2063,19 +2036,14 @@ define <2 x float> @v_uaddsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; GFX6-NEXT: v_xor_b32_e32 v5, -1, v3
; GFX6-NEXT: v_min_u32_e32 v4, v5, v4
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX6-NEXT: s_mov_b32 s4, 0xffff
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v4
; GFX6-NEXT: v_and_b32_e32 v1, s4, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX6-NEXT: v_and_b32_e32 v0, s4, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: v_and_b32_e32 v1, s4, v2
; GFX6-NEXT: v_and_b32_e32 v2, s4, v3
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3
; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_uaddsat_v4i16:
@ -2142,16 +2110,11 @@ define amdgpu_ps <2 x i32> @s_uaddsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
; GFX6-NEXT: s_cmp_lt_u32 s5, s4
; GFX6-NEXT: s_cselect_b32 s4, s5, s4
; GFX6-NEXT: s_add_i32 s3, s3, s4
; GFX6-NEXT: s_mov_b32 s4, 0xffff
; GFX6-NEXT: s_and_b32 s1, s1, s4
; GFX6-NEXT: s_lshr_b32 s3, s3, 16
; GFX6-NEXT: s_and_b32 s0, s0, s4
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_lshr_b32 s3, s3, 16
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: s_and_b32 s1, s2, s4
; GFX6-NEXT: s_and_b32 s2, s3, s4
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
; GFX6-NEXT: s_or_b32 s1, s1, s2
; GFX6-NEXT: s_lshl_b32 s1, s3, 16
; GFX6-NEXT: s_or_b32 s1, s2, s1
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_uaddsat_v4i16:
@ -2241,29 +2204,22 @@ define <3 x float> @v_uaddsat_v6i16(<6 x i16> %lhs, <6 x i16> %rhs) {
; GFX6-NEXT: v_min_u32_e32 v6, v7, v6
; GFX6-NEXT: v_lshlrev_b32_e32 v5, 16, v5
; GFX6-NEXT: v_add_i32_e32 v4, vcc, v4, v6
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX6-NEXT: s_mov_b32 s4, 0xffff
; GFX6-NEXT: v_lshlrev_b32_e32 v6, 16, v11
; GFX6-NEXT: v_xor_b32_e32 v7, -1, v5
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_and_b32_e32 v1, s4, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_min_u32_e32 v6, v7, v6
; GFX6-NEXT: v_add_i32_e32 v5, vcc, v5, v6
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX6-NEXT: v_and_b32_e32 v0, s4, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: v_and_b32_e32 v1, s4, v2
; GFX6-NEXT: v_and_b32_e32 v2, s4, v3
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3
; GFX6-NEXT: v_lshrrev_b32_e32 v5, 16, v5
; GFX6-NEXT: v_and_b32_e32 v3, s4, v5
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v4, 16, v4
; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
; GFX6-NEXT: v_and_b32_e32 v2, s4, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v5
; GFX6-NEXT: v_or_b32_e32 v2, v4, v2
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_uaddsat_v6i16:
@ -2351,20 +2307,13 @@ define amdgpu_ps <3 x i32> @s_uaddsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
; GFX6-NEXT: s_cmp_lt_u32 s7, s6
; GFX6-NEXT: s_cselect_b32 s6, s7, s6
; GFX6-NEXT: s_add_i32 s5, s5, s6
; GFX6-NEXT: s_mov_b32 s6, 0xffff
; GFX6-NEXT: s_and_b32 s1, s1, s6
; GFX6-NEXT: s_and_b32 s0, s0, s6
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: s_and_b32 s1, s2, s6
; GFX6-NEXT: s_and_b32 s2, s3, s6
; GFX6-NEXT: s_lshl_b32 s1, s3, 16
; GFX6-NEXT: s_lshr_b32 s5, s5, 16
; GFX6-NEXT: s_and_b32 s3, s5, s6
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
; GFX6-NEXT: s_or_b32 s1, s1, s2
; GFX6-NEXT: s_and_b32 s2, s4, s6
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
; GFX6-NEXT: s_or_b32 s2, s2, s3
; GFX6-NEXT: s_or_b32 s1, s2, s1
; GFX6-NEXT: s_lshl_b32 s2, s5, 16
; GFX6-NEXT: s_or_b32 s2, s4, s2
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_uaddsat_v6i16:
@ -2466,36 +2415,27 @@ define <4 x float> @v_uaddsat_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
; GFX6-NEXT: v_xor_b32_e32 v9, -1, v6
; GFX6-NEXT: v_min_u32_e32 v8, v9, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v7, 16, v7
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX6-NEXT: s_mov_b32 s4, 0xffff
; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v8
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_and_b32_e32 v1, s4, v1
; GFX6-NEXT: v_lshlrev_b32_e32 v8, 16, v15
; GFX6-NEXT: v_xor_b32_e32 v9, -1, v7
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_min_u32_e32 v8, v9, v8
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX6-NEXT: v_and_b32_e32 v0, s4, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_add_i32_e32 v7, vcc, v7, v8
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: v_and_b32_e32 v1, s4, v2
; GFX6-NEXT: v_and_b32_e32 v2, s4, v3
; GFX6-NEXT: v_lshrrev_b32_e32 v5, 16, v5
; GFX6-NEXT: v_and_b32_e32 v3, s4, v5
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX6-NEXT: v_lshrrev_b32_e32 v4, 16, v4
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3
; GFX6-NEXT: v_lshrrev_b32_e32 v7, 16, v7
; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
; GFX6-NEXT: v_and_b32_e32 v2, s4, v4
; GFX6-NEXT: v_and_b32_e32 v4, s4, v7
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX6-NEXT: v_lshrrev_b32_e32 v5, 16, v5
; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v4, 16, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v5
; GFX6-NEXT: v_lshrrev_b32_e32 v6, 16, v6
; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
; GFX6-NEXT: v_and_b32_e32 v3, s4, v6
; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4
; GFX6-NEXT: v_or_b32_e32 v3, v3, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v7
; GFX6-NEXT: v_or_b32_e32 v2, v4, v2
; GFX6-NEXT: v_or_b32_e32 v3, v6, v3
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_uaddsat_v8i16:
@ -2603,24 +2543,15 @@ define amdgpu_ps <4 x i32> @s_uaddsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
; GFX6-NEXT: s_cmp_lt_u32 s9, s8
; GFX6-NEXT: s_cselect_b32 s8, s9, s8
; GFX6-NEXT: s_add_i32 s7, s7, s8
; GFX6-NEXT: s_mov_b32 s8, 0xffff
; GFX6-NEXT: s_and_b32 s1, s1, s8
; GFX6-NEXT: s_and_b32 s0, s0, s8
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: s_and_b32 s1, s2, s8
; GFX6-NEXT: s_and_b32 s2, s3, s8
; GFX6-NEXT: s_and_b32 s3, s5, s8
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
; GFX6-NEXT: s_lshl_b32 s1, s3, 16
; GFX6-NEXT: s_lshr_b32 s7, s7, 16
; GFX6-NEXT: s_or_b32 s1, s1, s2
; GFX6-NEXT: s_and_b32 s2, s4, s8
; GFX6-NEXT: s_and_b32 s4, s7, s8
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
; GFX6-NEXT: s_or_b32 s2, s2, s3
; GFX6-NEXT: s_and_b32 s3, s6, s8
; GFX6-NEXT: s_lshl_b32 s4, s4, 16
; GFX6-NEXT: s_or_b32 s3, s3, s4
; GFX6-NEXT: s_lshl_b32 s3, s7, 16
; GFX6-NEXT: s_or_b32 s1, s2, s1
; GFX6-NEXT: s_lshl_b32 s2, s5, 16
; GFX6-NEXT: s_or_b32 s2, s4, s2
; GFX6-NEXT: s_or_b32 s3, s6, s3
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_uaddsat_v8i16:

View File

@ -199,10 +199,7 @@ define i16 @v_usubsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) {
; GFX6-NEXT: v_min_u32_e32 v2, v1, v2
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v2
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 24, v1
; GFX6-NEXT: v_mov_b32_e32 v2, 0xff
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 24, v0
; GFX6-NEXT: v_and_b32_e32 v1, v1, v2
; GFX6-NEXT: v_and_b32_e32 v0, v0, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: s_setpc_b64 s[30:31]
@ -217,10 +214,9 @@ define i16 @v_usubsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) {
; GFX8-NEXT: v_lshlrev_b16_e32 v1, 8, v1
; GFX8-NEXT: v_sub_u16_e64 v0, v0, v1 clamp
; GFX8-NEXT: v_sub_u16_e64 v1, v3, v2 clamp
; GFX8-NEXT: v_mov_b32_e32 v2, 0xff
; GFX8-NEXT: v_and_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
; GFX8-NEXT: v_and_b32_sdwa v1, v1, v2 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
; GFX8-NEXT: v_lshrrev_b16_e32 v1, 8, v1
; GFX8-NEXT: v_lshlrev_b16_e32 v1, 8, v1
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_usubsat_v2i8:
@ -283,10 +279,7 @@ define amdgpu_ps i16 @s_usubsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
; GFX6-NEXT: s_cmp_lt_u32 s1, s2
; GFX6-NEXT: s_cselect_b32 s2, s1, s2
; GFX6-NEXT: s_sub_i32 s1, s1, s2
; GFX6-NEXT: s_movk_i32 s2, 0xff
; GFX6-NEXT: s_lshr_b32 s1, s1, 24
; GFX6-NEXT: s_and_b32 s1, s1, s2
; GFX6-NEXT: s_and_b32 s0, s0, s2
; GFX6-NEXT: s_lshl_b32 s1, s1, 8
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: ; return to shader part epilog
@ -303,11 +296,10 @@ define amdgpu_ps i16 @s_usubsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
; GFX8-NEXT: v_sub_u16_e64 v0, s0, v0 clamp
; GFX8-NEXT: s_lshl_b32 s0, s2, s4
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: v_mov_b32_e32 v2, 0xff
; GFX8-NEXT: v_sub_u16_e64 v1, s0, v1 clamp
; GFX8-NEXT: v_and_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
; GFX8-NEXT: v_and_b32_sdwa v1, v1, v2 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
; GFX8-NEXT: v_lshrrev_b16_e32 v1, 8, v1
; GFX8-NEXT: v_lshlrev_b16_e32 v1, 8, v1
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
; GFX8-NEXT: ; return to shader part epilog
;
@ -386,25 +378,20 @@ define i32 @v_usubsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) {
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v3
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 24, v6
; GFX6-NEXT: v_min_u32_e32 v3, v2, v3
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 24, v1
; GFX6-NEXT: s_movk_i32 s4, 0xff
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v3
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 24, v0
; GFX6-NEXT: v_and_b32_e32 v1, s4, v1
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 24, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v4, 24, v7
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 24, v1
; GFX6-NEXT: v_min_u32_e32 v4, v3, v4
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 24, v2
; GFX6-NEXT: v_and_b32_e32 v0, s4, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, v3, v4
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 24, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 24, v2
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: v_and_b32_e32 v1, s4, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v2
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 24, v3
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: v_and_b32_e32 v1, s4, v3
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v1
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v3
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
@ -549,17 +536,12 @@ define amdgpu_ps i32 @s_usubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
; GFX6-NEXT: s_cmp_lt_u32 s3, s4
; GFX6-NEXT: s_cselect_b32 s4, s3, s4
; GFX6-NEXT: s_sub_i32 s3, s3, s4
; GFX6-NEXT: s_movk_i32 s4, 0xff
; GFX6-NEXT: s_and_b32 s1, s1, s4
; GFX6-NEXT: s_and_b32 s0, s0, s4
; GFX6-NEXT: s_lshl_b32 s1, s1, 8
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: s_and_b32 s1, s2, s4
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_lshl_b32 s1, s2, 16
; GFX6-NEXT: s_lshr_b32 s3, s3, 24
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: s_and_b32 s1, s3, s4
; GFX6-NEXT: s_lshl_b32 s1, s1, 24
; GFX6-NEXT: s_lshl_b32 s1, s3, 24
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: ; return to shader part epilog
;
@ -1802,10 +1784,7 @@ define amdgpu_ps i32 @s_usubsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs
; GFX6-NEXT: s_cmp_lt_u32 s1, s2
; GFX6-NEXT: s_cselect_b32 s2, s1, s2
; GFX6-NEXT: s_sub_i32 s1, s1, s2
; GFX6-NEXT: s_mov_b32 s2, 0xffff
; GFX6-NEXT: s_lshr_b32 s1, s1, 16
; GFX6-NEXT: s_and_b32 s1, s1, s2
; GFX6-NEXT: s_and_b32 s0, s0, s2
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: ; return to shader part epilog
@ -1854,10 +1833,7 @@ define amdgpu_ps float @usubsat_v2i16_sv(<2 x i16> inreg %lhs, <2 x i16> %rhs) {
; GFX6-NEXT: v_min_u32_e32 v1, s0, v1
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s0, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX6-NEXT: s_mov_b32 s0, 0xffff
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_and_b32_e32 v1, s0, v1
; GFX6-NEXT: v_and_b32_e32 v0, s0, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: ; return to shader part epilog
@ -1900,10 +1876,7 @@ define amdgpu_ps float @usubsat_v2i16_vs(<2 x i16> %lhs, <2 x i16> inreg %rhs) {
; GFX6-NEXT: v_min_u32_e32 v2, s0, v1
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v2
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX6-NEXT: s_mov_b32 s0, 0xffff
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_and_b32_e32 v1, s0, v1
; GFX6-NEXT: v_and_b32_e32 v0, s0, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: ; return to shader part epilog
@ -1965,19 +1938,14 @@ define <2 x float> @v_usubsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v7
; GFX6-NEXT: v_min_u32_e32 v4, v3, v4
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX6-NEXT: s_mov_b32 s4, 0xffff
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, v3, v4
; GFX6-NEXT: v_and_b32_e32 v1, s4, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX6-NEXT: v_and_b32_e32 v0, s4, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: v_and_b32_e32 v1, s4, v2
; GFX6-NEXT: v_and_b32_e32 v2, s4, v3
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3
; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_usubsat_v4i16:
@ -2040,16 +2008,11 @@ define amdgpu_ps <2 x i32> @s_usubsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
; GFX6-NEXT: s_cmp_lt_u32 s3, s4
; GFX6-NEXT: s_cselect_b32 s4, s3, s4
; GFX6-NEXT: s_sub_i32 s3, s3, s4
; GFX6-NEXT: s_mov_b32 s4, 0xffff
; GFX6-NEXT: s_and_b32 s1, s1, s4
; GFX6-NEXT: s_lshr_b32 s3, s3, 16
; GFX6-NEXT: s_and_b32 s0, s0, s4
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_lshr_b32 s3, s3, 16
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: s_and_b32 s1, s2, s4
; GFX6-NEXT: s_and_b32 s2, s3, s4
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
; GFX6-NEXT: s_or_b32 s1, s1, s2
; GFX6-NEXT: s_lshl_b32 s1, s3, 16
; GFX6-NEXT: s_or_b32 s1, s2, s1
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_usubsat_v4i16:
@ -2133,29 +2096,22 @@ define <3 x float> @v_usubsat_v6i16(<6 x i16> %lhs, <6 x i16> %rhs) {
; GFX6-NEXT: v_lshlrev_b32_e32 v6, 16, v10
; GFX6-NEXT: v_min_u32_e32 v6, v4, v6
; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v6
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX6-NEXT: s_mov_b32 s4, 0xffff
; GFX6-NEXT: v_lshlrev_b32_e32 v5, 16, v5
; GFX6-NEXT: v_lshlrev_b32_e32 v6, 16, v11
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_min_u32_e32 v6, v5, v6
; GFX6-NEXT: v_and_b32_e32 v1, s4, v1
; GFX6-NEXT: v_sub_i32_e32 v5, vcc, v5, v6
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX6-NEXT: v_and_b32_e32 v0, s4, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: v_and_b32_e32 v1, s4, v2
; GFX6-NEXT: v_and_b32_e32 v2, s4, v3
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3
; GFX6-NEXT: v_lshrrev_b32_e32 v5, 16, v5
; GFX6-NEXT: v_and_b32_e32 v3, s4, v5
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v4, 16, v4
; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
; GFX6-NEXT: v_and_b32_e32 v2, s4, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v5
; GFX6-NEXT: v_or_b32_e32 v2, v4, v2
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_usubsat_v6i16:
@ -2237,20 +2193,13 @@ define amdgpu_ps <3 x i32> @s_usubsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
; GFX6-NEXT: s_cmp_lt_u32 s5, s6
; GFX6-NEXT: s_cselect_b32 s6, s5, s6
; GFX6-NEXT: s_sub_i32 s5, s5, s6
; GFX6-NEXT: s_mov_b32 s6, 0xffff
; GFX6-NEXT: s_and_b32 s1, s1, s6
; GFX6-NEXT: s_and_b32 s0, s0, s6
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: s_and_b32 s1, s2, s6
; GFX6-NEXT: s_and_b32 s2, s3, s6
; GFX6-NEXT: s_lshl_b32 s1, s3, 16
; GFX6-NEXT: s_lshr_b32 s5, s5, 16
; GFX6-NEXT: s_and_b32 s3, s5, s6
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
; GFX6-NEXT: s_or_b32 s1, s1, s2
; GFX6-NEXT: s_and_b32 s2, s4, s6
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
; GFX6-NEXT: s_or_b32 s2, s2, s3
; GFX6-NEXT: s_or_b32 s1, s2, s1
; GFX6-NEXT: s_lshl_b32 s2, s5, 16
; GFX6-NEXT: s_or_b32 s2, s4, s2
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_usubsat_v6i16:
@ -2344,36 +2293,27 @@ define <4 x float> @v_usubsat_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
; GFX6-NEXT: v_lshlrev_b32_e32 v6, 16, v6
; GFX6-NEXT: v_lshlrev_b32_e32 v8, 16, v14
; GFX6-NEXT: v_min_u32_e32 v8, v6, v8
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX6-NEXT: s_mov_b32 s4, 0xffff
; GFX6-NEXT: v_sub_i32_e32 v6, vcc, v6, v8
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_and_b32_e32 v1, s4, v1
; GFX6-NEXT: v_lshlrev_b32_e32 v7, 16, v7
; GFX6-NEXT: v_lshlrev_b32_e32 v8, 16, v15
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_min_u32_e32 v8, v7, v8
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX6-NEXT: v_and_b32_e32 v0, s4, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_sub_i32_e32 v7, vcc, v7, v8
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: v_and_b32_e32 v1, s4, v2
; GFX6-NEXT: v_and_b32_e32 v2, s4, v3
; GFX6-NEXT: v_lshrrev_b32_e32 v5, 16, v5
; GFX6-NEXT: v_and_b32_e32 v3, s4, v5
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX6-NEXT: v_lshrrev_b32_e32 v4, 16, v4
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3
; GFX6-NEXT: v_lshrrev_b32_e32 v7, 16, v7
; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
; GFX6-NEXT: v_and_b32_e32 v2, s4, v4
; GFX6-NEXT: v_and_b32_e32 v4, s4, v7
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX6-NEXT: v_lshrrev_b32_e32 v5, 16, v5
; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v4, 16, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v5
; GFX6-NEXT: v_lshrrev_b32_e32 v6, 16, v6
; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
; GFX6-NEXT: v_and_b32_e32 v3, s4, v6
; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4
; GFX6-NEXT: v_or_b32_e32 v3, v3, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v7
; GFX6-NEXT: v_or_b32_e32 v2, v4, v2
; GFX6-NEXT: v_or_b32_e32 v3, v6, v3
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_usubsat_v8i16:
@ -2473,24 +2413,15 @@ define amdgpu_ps <4 x i32> @s_usubsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
; GFX6-NEXT: s_cmp_lt_u32 s7, s8
; GFX6-NEXT: s_cselect_b32 s8, s7, s8
; GFX6-NEXT: s_sub_i32 s7, s7, s8
; GFX6-NEXT: s_mov_b32 s8, 0xffff
; GFX6-NEXT: s_and_b32 s1, s1, s8
; GFX6-NEXT: s_and_b32 s0, s0, s8
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: s_and_b32 s1, s2, s8
; GFX6-NEXT: s_and_b32 s2, s3, s8
; GFX6-NEXT: s_and_b32 s3, s5, s8
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
; GFX6-NEXT: s_lshl_b32 s1, s3, 16
; GFX6-NEXT: s_lshr_b32 s7, s7, 16
; GFX6-NEXT: s_or_b32 s1, s1, s2
; GFX6-NEXT: s_and_b32 s2, s4, s8
; GFX6-NEXT: s_and_b32 s4, s7, s8
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
; GFX6-NEXT: s_or_b32 s2, s2, s3
; GFX6-NEXT: s_and_b32 s3, s6, s8
; GFX6-NEXT: s_lshl_b32 s4, s4, 16
; GFX6-NEXT: s_or_b32 s3, s3, s4
; GFX6-NEXT: s_lshl_b32 s3, s7, 16
; GFX6-NEXT: s_or_b32 s1, s2, s1
; GFX6-NEXT: s_lshl_b32 s2, s5, 16
; GFX6-NEXT: s_or_b32 s2, s4, s2
; GFX6-NEXT: s_or_b32 s3, s6, s3
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_usubsat_v8i16:

View File

@ -37,7 +37,6 @@ define amdgpu_ps i32 @scalar_xnor_v2i16_one_use(<2 x i16> inreg %a, <2 x i16> in
; GFX8-NEXT: s_lshr_b32 s1, s0, 16
; GFX8-NEXT: s_mov_b32 s3, s2
; GFX8-NEXT: s_and_b32 s0, s0, s2
; GFX8-NEXT: s_and_b32 s1, s1, s2
; GFX8-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
; GFX8-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
@ -118,21 +117,19 @@ define amdgpu_ps i64 @scalar_xnor_v4i16_one_use(<4 x i16> inreg %a, <4 x i16> in
; GFX8-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
; GFX8-NEXT: s_mov_b32 s4, 0xffff
; GFX8-NEXT: s_lshr_b32 s3, s0, 16
; GFX8-NEXT: s_lshr_b32 s5, s1, 16
; GFX8-NEXT: s_and_b32 s2, s0, s4
; GFX8-NEXT: s_and_b32 s0, s1, s4
; GFX8-NEXT: s_and_b32 s1, s5, s4
; GFX8-NEXT: s_mov_b32 s5, s4
; GFX8-NEXT: s_and_b32 s3, s3, s4
; GFX8-NEXT: s_and_b64 s[2:3], s[2:3], s[4:5]
; GFX8-NEXT: s_and_b64 s[0:1], s[0:1], s[4:5]
; GFX8-NEXT: s_lshr_b32 s7, s1, 16
; GFX8-NEXT: s_and_b32 s6, s1, s4
; GFX8-NEXT: s_and_b64 s[0:1], s[2:3], s[4:5]
; GFX8-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
; GFX8-NEXT: s_and_b64 s[2:3], s[6:7], s[4:5]
; GFX8-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
; GFX8-NEXT: s_xor_b64 s[6:7], s[0:1], s[4:5]
; GFX8-NEXT: s_and_b32 s1, s2, s4
; GFX8-NEXT: s_lshl_b32 s0, s3, 16
; GFX8-NEXT: s_or_b32 s0, s0, s1
; GFX8-NEXT: s_lshl_b32 s1, s7, 16
; GFX8-NEXT: s_and_b32 s2, s6, s4
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
; GFX8-NEXT: s_and_b32 s0, s0, s4
; GFX8-NEXT: s_or_b32 s0, s1, s0
; GFX8-NEXT: s_lshl_b32 s1, s3, 16
; GFX8-NEXT: s_and_b32 s2, s2, s4
; GFX8-NEXT: s_or_b32 s1, s1, s2
; GFX8-NEXT: ; return to shader part epilog
;