[AMDGPU] Add selection pattern for v_xnor_b32

This allows GlobalISel to use this instruction where available. I assume
SelectionDAG always selects s_xnor_b32 so it isn't affected by this
change.

Differential Revision: https://reviews.llvm.org/D97560
This commit is contained in:
Jay Foad 2021-02-26 16:31:11 +00:00
parent 21bb63893e
commit dc2259537a
3 changed files with 128 additions and 29 deletions

View File

@ -636,6 +636,11 @@ def add_ctpop : PatFrag <
(add (ctpop $src0), $src1)
>;
def xnor : PatFrag <
(ops node:$src0, node:$src1),
(not (xor $src0, $src1))
>;
foreach I = 1-4 in {
def shl#I#_add : PatFrag <
(ops node:$src0, node:$src1),

View File

@ -678,7 +678,7 @@ defm V_MAC_F16 : VOP2Inst <"v_mac_f16", VOP_MAC_F16>;
let SubtargetPredicate = HasDLInsts in {
defm V_XNOR_B32 : VOP2Inst <"v_xnor_b32", VOP_I32_I32_I32>;
defm V_XNOR_B32 : VOP2Inst <"v_xnor_b32", VOP_I32_I32_I32, xnor>;
let Constraints = "$vdst = $src2",
DisableEncoding = "$src2",

View File

@ -166,12 +166,32 @@ define amdgpu_ps <2 x i64> @scalar_xnor_i64_mul_use(i64 inreg %a, i64 inreg %b)
}
define i32 @vector_xnor_i32_one_use(i32 %a, i32 %b) {
; GCN-LABEL: vector_xnor_i32_one_use:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_xor_b32_e32 v0, v0, v1
; GCN-NEXT: v_xor_b32_e32 v0, -1, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
; GFX7-LABEL: vector_xnor_i32_one_use:
; GFX7: ; %bb.0: ; %entry
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_xor_b32_e32 v0, v0, v1
; GFX7-NEXT: v_xor_b32_e32 v0, -1, v0
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: vector_xnor_i32_one_use:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_xor_b32_e32 v0, v0, v1
; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: vector_xnor_i32_one_use:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_xor_b32_e32 v0, v0, v1
; GFX900-NEXT: v_xor_b32_e32 v0, -1, v0
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX906-LABEL: vector_xnor_i32_one_use:
; GFX906: ; %bb.0: ; %entry
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-NEXT: v_xnor_b32_e32 v0, v0, v1
; GFX906-NEXT: s_setpc_b64 s[30:31]
entry:
%xor = xor i32 %a, %b
%r = xor i32 %xor, -1
@ -194,11 +214,28 @@ entry:
}
define amdgpu_ps float @xnor_s_v_i32_one_use(i32 inreg %s, i32 %v) {
; GCN-LABEL: xnor_s_v_i32_one_use:
; GCN: ; %bb.0:
; GCN-NEXT: v_xor_b32_e32 v0, s0, v0
; GCN-NEXT: v_xor_b32_e32 v0, -1, v0
; GCN-NEXT: ; return to shader part epilog
; GFX7-LABEL: xnor_s_v_i32_one_use:
; GFX7: ; %bb.0:
; GFX7-NEXT: v_xor_b32_e32 v0, s0, v0
; GFX7-NEXT: v_xor_b32_e32 v0, -1, v0
; GFX7-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: xnor_s_v_i32_one_use:
; GFX8: ; %bb.0:
; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0
; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
; GFX8-NEXT: ; return to shader part epilog
;
; GFX900-LABEL: xnor_s_v_i32_one_use:
; GFX900: ; %bb.0:
; GFX900-NEXT: v_xor_b32_e32 v0, s0, v0
; GFX900-NEXT: v_xor_b32_e32 v0, -1, v0
; GFX900-NEXT: ; return to shader part epilog
;
; GFX906-LABEL: xnor_s_v_i32_one_use:
; GFX906: ; %bb.0:
; GFX906-NEXT: v_xnor_b32_e32 v0, s0, v0
; GFX906-NEXT: ; return to shader part epilog
%xor = xor i32 %s, %v
%d = xor i32 %xor, -1
%cast = bitcast i32 %d to float
@ -206,11 +243,28 @@ define amdgpu_ps float @xnor_s_v_i32_one_use(i32 inreg %s, i32 %v) {
}
define amdgpu_ps float @xnor_v_s_i32_one_use(i32 inreg %s, i32 %v) {
; GCN-LABEL: xnor_v_s_i32_one_use:
; GCN: ; %bb.0:
; GCN-NEXT: v_xor_b32_e32 v0, s0, v0
; GCN-NEXT: v_xor_b32_e32 v0, -1, v0
; GCN-NEXT: ; return to shader part epilog
; GFX7-LABEL: xnor_v_s_i32_one_use:
; GFX7: ; %bb.0:
; GFX7-NEXT: v_xor_b32_e32 v0, s0, v0
; GFX7-NEXT: v_xor_b32_e32 v0, -1, v0
; GFX7-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: xnor_v_s_i32_one_use:
; GFX8: ; %bb.0:
; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0
; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
; GFX8-NEXT: ; return to shader part epilog
;
; GFX900-LABEL: xnor_v_s_i32_one_use:
; GFX900: ; %bb.0:
; GFX900-NEXT: v_xor_b32_e32 v0, s0, v0
; GFX900-NEXT: v_xor_b32_e32 v0, -1, v0
; GFX900-NEXT: ; return to shader part epilog
;
; GFX906-LABEL: xnor_v_s_i32_one_use:
; GFX906: ; %bb.0:
; GFX906-NEXT: v_xnor_b32_e64 v0, v0, s0
; GFX906-NEXT: ; return to shader part epilog
%xor = xor i32 %v, %s
%d = xor i32 %xor, -1
%cast = bitcast i32 %d to float
@ -305,12 +359,32 @@ define amdgpu_ps <2 x float> @xnor_i64_v_s_one_use(i64 inreg %a, i64 %b64) {
}
define i32 @vector_xor_na_b_i32_one_use(i32 %a, i32 %b) {
; GCN-LABEL: vector_xor_na_b_i32_one_use:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_xor_b32_e32 v0, -1, v0
; GCN-NEXT: v_xor_b32_e32 v0, v0, v1
; GCN-NEXT: s_setpc_b64 s[30:31]
; GFX7-LABEL: vector_xor_na_b_i32_one_use:
; GFX7: ; %bb.0: ; %entry
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_xor_b32_e32 v0, -1, v0
; GFX7-NEXT: v_xor_b32_e32 v0, v0, v1
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: vector_xor_na_b_i32_one_use:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
; GFX8-NEXT: v_xor_b32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: vector_xor_na_b_i32_one_use:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_xor_b32_e32 v0, -1, v0
; GFX900-NEXT: v_xor_b32_e32 v0, v0, v1
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX906-LABEL: vector_xor_na_b_i32_one_use:
; GFX906: ; %bb.0: ; %entry
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-NEXT: v_xnor_b32_e32 v0, v0, v1
; GFX906-NEXT: s_setpc_b64 s[30:31]
entry:
%na = xor i32 %a, -1
%r = xor i32 %na, %b
@ -318,12 +392,32 @@ entry:
}
define i32 @vector_xor_a_nb_i32_one_use(i32 %a, i32 %b) {
; GCN-LABEL: vector_xor_a_nb_i32_one_use:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_xor_b32_e32 v1, -1, v1
; GCN-NEXT: v_xor_b32_e32 v0, v0, v1
; GCN-NEXT: s_setpc_b64 s[30:31]
; GFX7-LABEL: vector_xor_a_nb_i32_one_use:
; GFX7: ; %bb.0: ; %entry
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_xor_b32_e32 v1, -1, v1
; GFX7-NEXT: v_xor_b32_e32 v0, v0, v1
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: vector_xor_a_nb_i32_one_use:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_xor_b32_e32 v1, -1, v1
; GFX8-NEXT: v_xor_b32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: vector_xor_a_nb_i32_one_use:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_xor_b32_e32 v1, -1, v1
; GFX900-NEXT: v_xor_b32_e32 v0, v0, v1
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX906-LABEL: vector_xor_a_nb_i32_one_use:
; GFX906: ; %bb.0: ; %entry
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-NEXT: v_xnor_b32_e32 v0, v1, v0
; GFX906-NEXT: s_setpc_b64 s[30:31]
entry:
%nb = xor i32 %b, -1
%r = xor i32 %a, %nb