diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index 4d7da21a591c..22a7a9c44a3c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -682,6 +682,13 @@ multiclass BFEPattern { (UBFE $src, $rshift, (MOV (i32 (IMMPopCount $mask)))) >; + // x & (-1 >> (bitwidth - y)) + def : AMDGPUPat < + (and i32:$src, (srl_oneuse -1, (sub 32, i32:$width))), + (UBFE $src, (i32 0), $width) + >; + + // x << (bitwidth - y) >> (bitwidth - y) def : AMDGPUPat < (srl (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)), (UBFE $src, (i32 0), $width) diff --git a/llvm/test/CodeGen/AMDGPU/extract-lowbits.ll b/llvm/test/CodeGen/AMDGPU/extract-lowbits.ll index 563756271f36..150b064569a5 100644 --- a/llvm/test/CodeGen/AMDGPU/extract-lowbits.ll +++ b/llvm/test/CodeGen/AMDGPU/extract-lowbits.ll @@ -153,21 +153,11 @@ define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) nounwind { ; ---------------------------------------------------------------------------- ; define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind { -; SI-LABEL: bzhi32_c0: -; SI: ; %bb.0: -; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1 -; SI-NEXT: v_lshr_b32_e32 v1, -1, v1 -; SI-NEXT: v_and_b32_e32 v0, v1, v0 -; SI-NEXT: s_setpc_b64 s[30:31] -; -; VI-LABEL: bzhi32_c0: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v1 -; VI-NEXT: v_lshrrev_b32_e64 v1, v1, -1 -; VI-NEXT: v_and_b32_e32 v0, v1, v0 -; VI-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: bzhi32_c0: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] %numhighbits = sub i32 32, %numlowbits %mask = lshr i32 -1, %numhighbits %masked = and i32 %mask, %val @@ -200,21 +190,11 @@ define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind { } define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind { -; SI-LABEL: bzhi32_c4_commutative: -; SI: ; %bb.0: -; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1 -; SI-NEXT: v_lshr_b32_e32 v1, -1, v1 -; SI-NEXT: v_and_b32_e32 v0, v0, v1 -; SI-NEXT: s_setpc_b64 s[30:31] -; -; VI-LABEL: bzhi32_c4_commutative: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v1 -; VI-NEXT: v_lshrrev_b32_e64 v1, v1, -1 -; VI-NEXT: v_and_b32_e32 v0, v0, v1 -; VI-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: bzhi32_c4_commutative: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] %numhighbits = sub i32 32, %numlowbits %mask = lshr i32 -1, %numhighbits %masked = and i32 %val, %mask ; swapped order