forked from OSchip/llvm-project
AMDGPU: Add another BFE pattern
This is the pattern that falls out of the instruction's definition if offset == 0. llvm-svn: 295912
This commit is contained in:
parent
d0786099b1
commit
a9e16e6597
|
@ -71,6 +71,40 @@ def u8imm : Operand<i8> {
|
|||
//===--------------------------------------------------------------------===//
|
||||
def brtarget : Operand<OtherVT>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Misc. PatFrags
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class HasOneUseBinOp<SDPatternOperator op> : PatFrag<
|
||||
(ops node:$src0, node:$src1),
|
||||
(op $src0, $src1),
|
||||
[{ return N->hasOneUse(); }]
|
||||
>;
|
||||
|
||||
class HasOneUseTernaryOp<SDPatternOperator op> : PatFrag<
|
||||
(ops node:$src0, node:$src1, node:$src2),
|
||||
(op $src0, $src1, $src2),
|
||||
[{ return N->hasOneUse(); }]
|
||||
>;
|
||||
|
||||
|
||||
let Properties = [SDNPCommutative, SDNPAssociative] in {
|
||||
def smax_oneuse : HasOneUseBinOp<smax>;
|
||||
def smin_oneuse : HasOneUseBinOp<smin>;
|
||||
def umax_oneuse : HasOneUseBinOp<umax>;
|
||||
def umin_oneuse : HasOneUseBinOp<umin>;
|
||||
def fminnum_oneuse : HasOneUseBinOp<fminnum>;
|
||||
def fmaxnum_oneuse : HasOneUseBinOp<fmaxnum>;
|
||||
def and_oneuse : HasOneUseBinOp<and>;
|
||||
def or_oneuse : HasOneUseBinOp<or>;
|
||||
def xor_oneuse : HasOneUseBinOp<xor>;
|
||||
} // Properties = [SDNPCommutative, SDNPAssociative]
|
||||
|
||||
def sub_oneuse : HasOneUseBinOp<sub>;
|
||||
def shl_oneuse : HasOneUseBinOp<shl>;
|
||||
|
||||
def select_oneuse : HasOneUseTernaryOp<select>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// PatLeafs for floating-point comparisons
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -156,22 +190,6 @@ def COND_NULL : PatLeaf <
|
|||
>;
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Misc. PatFrags
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class HasOneUseBinOp<SDPatternOperator op> : PatFrag<
|
||||
(ops node:$src0, node:$src1),
|
||||
(op $src0, $src1),
|
||||
[{ return N->hasOneUse(); }]
|
||||
>;
|
||||
|
||||
class HasOneUseTernaryOp<SDPatternOperator op> : PatFrag<
|
||||
(ops node:$src0, node:$src1, node:$src2),
|
||||
(op $src0, $src1, $src2),
|
||||
[{ return N->hasOneUse(); }]
|
||||
>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Load/Store Pattern Fragments
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -608,10 +626,22 @@ def IMMPopCount : SDNodeXForm<imm, [{
|
|||
MVT::i32);
|
||||
}]>;
|
||||
|
||||
class BFEPattern <Instruction BFE, Instruction MOV> : Pat <
|
||||
(i32 (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask)),
|
||||
(BFE $src, $rshift, (MOV (i32 (IMMPopCount $mask))))
|
||||
>;
|
||||
multiclass BFEPattern <Instruction UBFE, Instruction SBFE, Instruction MOV> {
|
||||
def : Pat <
|
||||
(i32 (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask)),
|
||||
(UBFE $src, $rshift, (MOV (i32 (IMMPopCount $mask))))
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(srl (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)),
|
||||
(UBFE $src, (i32 0), $width)
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(sra (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)),
|
||||
(SBFE $src, (i32 0), $width)
|
||||
>;
|
||||
}
|
||||
|
||||
// rotr pattern
|
||||
class ROTRPattern <Instruction BIT_ALIGN> : Pat <
|
||||
|
@ -630,22 +660,6 @@ class IntMed3Pat<Instruction med3Inst,
|
|||
(med3Inst $src0, $src1, $src2)
|
||||
>;
|
||||
|
||||
let Properties = [SDNPCommutative, SDNPAssociative] in {
|
||||
def smax_oneuse : HasOneUseBinOp<smax>;
|
||||
def smin_oneuse : HasOneUseBinOp<smin>;
|
||||
def umax_oneuse : HasOneUseBinOp<umax>;
|
||||
def umin_oneuse : HasOneUseBinOp<umin>;
|
||||
def fminnum_oneuse : HasOneUseBinOp<fminnum>;
|
||||
def fmaxnum_oneuse : HasOneUseBinOp<fmaxnum>;
|
||||
def and_oneuse : HasOneUseBinOp<and>;
|
||||
def or_oneuse : HasOneUseBinOp<or>;
|
||||
def xor_oneuse : HasOneUseBinOp<xor>;
|
||||
} // Properties = [SDNPCommutative, SDNPAssociative]
|
||||
|
||||
def sub_oneuse : HasOneUseBinOp<sub>;
|
||||
|
||||
def select_oneuse : HasOneUseTernaryOp<select>;
|
||||
|
||||
// Special conversion patterns
|
||||
|
||||
def cvt_rpi_i32_f32 : PatFrag <
|
||||
|
|
|
@ -388,7 +388,7 @@ def BFE_INT_eg : R600_3OP <0x5, "BFE_INT",
|
|||
VecALU
|
||||
>;
|
||||
|
||||
def : BFEPattern <BFE_UINT_eg, MOV_IMM_I32>;
|
||||
defm : BFEPattern <BFE_UINT_eg, BFE_INT_eg, MOV_IMM_I32>;
|
||||
|
||||
def BFI_INT_eg : R600_3OP <0x06, "BFI_INT",
|
||||
[(set i32:$dst, (AMDGPUbfi i32:$src0, i32:$src1, i32:$src2))],
|
||||
|
|
|
@ -1069,8 +1069,7 @@ multiclass BFMPatterns <ValueType vt, InstSI BFM, InstSI MOV> {
|
|||
|
||||
defm : BFMPatterns <i32, S_BFM_B32, S_MOV_B32>;
|
||||
// FIXME: defm : BFMPatterns <i64, S_BFM_B64, S_MOV_B64>;
|
||||
|
||||
def : BFEPattern <V_BFE_U32, S_MOV_B32>;
|
||||
defm : BFEPattern <V_BFE_U32, V_BFE_I32, S_MOV_B32>;
|
||||
|
||||
def : Pat<
|
||||
(fcanonicalize (f16 (VOP3Mods f16:$src, i32:$src_mods))),
|
||||
|
|
|
@ -0,0 +1,163 @@
|
|||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
|
||||
; GCN-LABEL: {{^}}v_ubfe_sub_i32:
|
||||
; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]]
|
||||
; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]]
|
||||
; GCN: v_bfe_u32 v{{[0-9]+}}, [[SRC]], 0, [[WIDTH]]
|
||||
define void @v_ubfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
|
||||
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
|
||||
%in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
|
||||
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
|
||||
%src = load volatile i32, i32 addrspace(1)* %in0.gep
|
||||
%width = load volatile i32, i32 addrspace(1)* %in0.gep
|
||||
%sub = sub i32 32, %width
|
||||
%shl = shl i32 %src, %sub
|
||||
%bfe = lshr i32 %shl, %sub
|
||||
store i32 %bfe, i32 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_ubfe_sub_multi_use_shl_i32:
|
||||
; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]]
|
||||
; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]]
|
||||
; GCN: v_sub_i32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]]
|
||||
|
||||
; SI-NEXT: v_lshl_b32_e32 [[SHL:v[0-9]+]], [[SRC]], [[SUB]]
|
||||
; SI-NEXT: v_lshr_b32_e32 [[BFE:v[0-9]+]], [[SHL]], [[SUB]]
|
||||
|
||||
; VI-NEXT: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], [[SUB]], [[SRC]]
|
||||
; VI-NEXT: v_lshrrev_b32_e32 [[BFE:v[0-9]+]], [[SUB]], [[SHL]]
|
||||
|
||||
; GCN: [[BFE]]
|
||||
; GCN: [[SHL]]
|
||||
define void @v_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
|
||||
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
|
||||
%in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
|
||||
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
|
||||
%src = load volatile i32, i32 addrspace(1)* %in0.gep
|
||||
%width = load volatile i32, i32 addrspace(1)* %in0.gep
|
||||
%sub = sub i32 32, %width
|
||||
%shl = shl i32 %src, %sub
|
||||
%bfe = lshr i32 %shl, %sub
|
||||
store i32 %bfe, i32 addrspace(1)* %out.gep
|
||||
store volatile i32 %shl, i32 addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}s_ubfe_sub_i32:
|
||||
; GCN: s_load_dword [[SRC:s[0-9]+]]
|
||||
; GCN: s_load_dword [[WIDTH:s[0-9]+]]
|
||||
; GCN: v_mov_b32_e32 [[VWIDTH:v[0-9]+]]
|
||||
; GCN: v_bfe_u32 v{{[0-9]+}}, [[SRC]], 0, [[VWIDTH]]
|
||||
define void @s_ubfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
|
||||
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
|
||||
%sub = sub i32 32, %width
|
||||
%shl = shl i32 %src, %sub
|
||||
%bfe = lshr i32 %shl, %sub
|
||||
store i32 %bfe, i32 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}s_ubfe_sub_multi_use_shl_i32:
|
||||
; GCN: s_load_dword [[SRC:s[0-9]+]]
|
||||
; GCN: s_load_dword [[WIDTH:s[0-9]+]]
|
||||
; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, [[WIDTH]]
|
||||
; GCN-NEXT: s_lshl_b32 [[SHL:s[0-9]+]], [[SRC]], [[SUB]]
|
||||
; GCN-NEXT: s_lshr_b32 s{{[0-9]+}}, [[SHL]], [[SUB]]
|
||||
define void @s_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
|
||||
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
|
||||
%sub = sub i32 32, %width
|
||||
%shl = shl i32 %src, %sub
|
||||
%bfe = lshr i32 %shl, %sub
|
||||
store i32 %bfe, i32 addrspace(1)* %out.gep
|
||||
store volatile i32 %shl, i32 addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_sbfe_sub_i32:
|
||||
; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]]
|
||||
; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]]
|
||||
; GCN: v_bfe_i32 v{{[0-9]+}}, [[SRC]], 0, [[WIDTH]]
|
||||
define void @v_sbfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
|
||||
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
|
||||
%in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
|
||||
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
|
||||
%src = load volatile i32, i32 addrspace(1)* %in0.gep
|
||||
%width = load volatile i32, i32 addrspace(1)* %in0.gep
|
||||
%sub = sub i32 32, %width
|
||||
%shl = shl i32 %src, %sub
|
||||
%bfe = ashr i32 %shl, %sub
|
||||
store i32 %bfe, i32 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_sbfe_sub_multi_use_shl_i32:
|
||||
; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]]
|
||||
; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]]
|
||||
; GCN: v_sub_i32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]]
|
||||
|
||||
; SI-NEXT: v_lshl_b32_e32 [[SHL:v[0-9]+]], [[SRC]], [[SUB]]
|
||||
; SI-NEXT: v_ashr_i32_e32 [[BFE:v[0-9]+]], [[SHL]], [[SUB]]
|
||||
|
||||
; VI-NEXT: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], [[SUB]], [[SRC]]
|
||||
; VI-NEXT: v_ashrrev_i32_e32 [[BFE:v[0-9]+]], [[SUB]], [[SHL]]
|
||||
|
||||
; GCN: [[BFE]]
|
||||
; GCN: [[SHL]]
|
||||
define void @v_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
|
||||
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
|
||||
%in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
|
||||
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
|
||||
%src = load volatile i32, i32 addrspace(1)* %in0.gep
|
||||
%width = load volatile i32, i32 addrspace(1)* %in0.gep
|
||||
%sub = sub i32 32, %width
|
||||
%shl = shl i32 %src, %sub
|
||||
%bfe = ashr i32 %shl, %sub
|
||||
store i32 %bfe, i32 addrspace(1)* %out.gep
|
||||
store volatile i32 %shl, i32 addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}s_sbfe_sub_i32:
|
||||
; GCN: s_load_dword [[SRC:s[0-9]+]]
|
||||
; GCN: s_load_dword [[WIDTH:s[0-9]+]]
|
||||
; GCN: v_mov_b32_e32 [[VWIDTH:v[0-9]+]]
|
||||
; GCN: v_bfe_i32 v{{[0-9]+}}, [[SRC]], 0, [[VWIDTH]]
|
||||
define void @s_sbfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
|
||||
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
|
||||
%sub = sub i32 32, %width
|
||||
%shl = shl i32 %src, %sub
|
||||
%bfe = ashr i32 %shl, %sub
|
||||
store i32 %bfe, i32 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}s_sbfe_sub_multi_use_shl_i32:
|
||||
; GCN: s_load_dword [[SRC:s[0-9]+]]
|
||||
; GCN: s_load_dword [[WIDTH:s[0-9]+]]
|
||||
; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, [[WIDTH]]
|
||||
; GCN-NEXT: s_lshl_b32 [[SHL:s[0-9]+]], [[SRC]], [[SUB]]
|
||||
; GCN-NEXT: s_ashr_i32 s{{[0-9]+}}, [[SHL]], [[SUB]]
|
||||
define void @s_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
|
||||
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
|
||||
%sub = sub i32 32, %width
|
||||
%shl = shl i32 %src, %sub
|
||||
%bfe = ashr i32 %shl, %sub
|
||||
store i32 %bfe, i32 addrspace(1)* %out.gep
|
||||
store volatile i32 %shl, i32 addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { nounwind }
|
Loading…
Reference in New Issue