[AMDGPU] Recognize x & (-1 >> (32 - y)) pattern.

Summary:
D47980 will canonicalize the `x << (32 - y) >> (32 - y)`,
which is the pattern the AMDGPU expects to `x &  (-1 >> (32 - y))`,
which is not recognized by AMDGPU.

Thus, it needs to be recognized, too.

Reviewers: nhaehnle, bogner, tstellar, arsenm

Reviewed By: arsenm

Subscribers: arsenm, kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits

Tags: #amdgpu

Differential Revision: https://reviews.llvm.org/D48007

llvm-svn: 334815
This commit is contained in:
Roman Lebedev 2018-06-15 09:56:31 +00:00
parent 1503fc0fd0
commit aa8587d1fc
2 changed files with 17 additions and 30 deletions

View File

@ -682,6 +682,13 @@ multiclass BFEPattern <Instruction UBFE, Instruction SBFE, Instruction MOV> {
(UBFE $src, $rshift, (MOV (i32 (IMMPopCount $mask))))
>;
// x & (-1 >> (bitwidth - y))
def : AMDGPUPat <
(and i32:$src, (srl_oneuse -1, (sub 32, i32:$width))),
(UBFE $src, (i32 0), $width)
>;
// x << (bitwidth - y) >> (bitwidth - y)
def : AMDGPUPat <
(srl (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)),
(UBFE $src, (i32 0), $width)

View File

@ -153,21 +153,11 @@ define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) nounwind {
; ---------------------------------------------------------------------------- ;
define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind {
; SI-LABEL: bzhi32_c0:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1
; SI-NEXT: v_lshr_b32_e32 v1, -1, v1
; SI-NEXT: v_and_b32_e32 v0, v1, v0
; SI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: bzhi32_c0:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v1
; VI-NEXT: v_lshrrev_b32_e64 v1, v1, -1
; VI-NEXT: v_and_b32_e32 v0, v1, v0
; VI-NEXT: s_setpc_b64 s[30:31]
; GCN-LABEL: bzhi32_c0:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1
; GCN-NEXT: s_setpc_b64 s[30:31]
%numhighbits = sub i32 32, %numlowbits
%mask = lshr i32 -1, %numhighbits
%masked = and i32 %mask, %val
@ -200,21 +190,11 @@ define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind {
}
define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind {
; SI-LABEL: bzhi32_c4_commutative:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1
; SI-NEXT: v_lshr_b32_e32 v1, -1, v1
; SI-NEXT: v_and_b32_e32 v0, v0, v1
; SI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: bzhi32_c4_commutative:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v1
; VI-NEXT: v_lshrrev_b32_e64 v1, v1, -1
; VI-NEXT: v_and_b32_e32 v0, v0, v1
; VI-NEXT: s_setpc_b64 s[30:31]
; GCN-LABEL: bzhi32_c4_commutative:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1
; GCN-NEXT: s_setpc_b64 s[30:31]
%numhighbits = sub i32 32, %numlowbits
%mask = lshr i32 -1, %numhighbits
%masked = and i32 %val, %mask ; swapped order