forked from OSchip/llvm-project
AMDGPU: Select BFI patterns with 64-bit ints
llvm-svn: 324431
This commit is contained in:
parent
258f059f88
commit
a18b3bcf51
|
@ -570,6 +570,18 @@ multiclass BFIPatterns <Instruction BFI_INT,
|
|||
(BFI_INT $x, $y, $z)
|
||||
>;
|
||||
|
||||
// 64-bit version
|
||||
def : AMDGPUPat <
|
||||
(or (and i64:$y, i64:$x), (and i64:$z, (not i64:$x))),
|
||||
(REG_SEQUENCE RC64,
|
||||
(BFI_INT (i32 (EXTRACT_SUBREG $x, sub0)),
|
||||
(i32 (EXTRACT_SUBREG $y, sub0)),
|
||||
(i32 (EXTRACT_SUBREG $z, sub0))), sub0,
|
||||
(BFI_INT (i32 (EXTRACT_SUBREG $x, sub1)),
|
||||
(i32 (EXTRACT_SUBREG $y, sub1)),
|
||||
(i32 (EXTRACT_SUBREG $z, sub1))), sub1)
|
||||
>;
|
||||
|
||||
// SHA-256 Ch function
|
||||
// z ^ (x & (y ^ z))
|
||||
def : AMDGPUPat <
|
||||
|
@ -577,6 +589,18 @@ multiclass BFIPatterns <Instruction BFI_INT,
|
|||
(BFI_INT $x, $y, $z)
|
||||
>;
|
||||
|
||||
// 64-bit version
|
||||
def : AMDGPUPat <
|
||||
(xor i64:$z, (and i64:$x, (xor i64:$y, i64:$z))),
|
||||
(REG_SEQUENCE RC64,
|
||||
(BFI_INT (i32 (EXTRACT_SUBREG $x, sub0)),
|
||||
(i32 (EXTRACT_SUBREG $y, sub0)),
|
||||
(i32 (EXTRACT_SUBREG $z, sub0))), sub0,
|
||||
(BFI_INT (i32 (EXTRACT_SUBREG $x, sub1)),
|
||||
(i32 (EXTRACT_SUBREG $y, sub1)),
|
||||
(i32 (EXTRACT_SUBREG $z, sub1))), sub1)
|
||||
>;
|
||||
|
||||
def : AMDGPUPat <
|
||||
(fcopysign f32:$src0, f32:$src1),
|
||||
(BFI_INT (LoadImm32 (i32 0x7fffffff)), $src0, $src1)
|
||||
|
@ -610,10 +634,25 @@ multiclass BFIPatterns <Instruction BFI_INT,
|
|||
// SHA-256 Ma patterns
|
||||
|
||||
// ((x & z) | (y & (x | z))) -> BFI_INT (XOR x, y), z, y
|
||||
class SHA256MaPattern <Instruction BFI_INT, Instruction XOR> : AMDGPUPat <
|
||||
(or (and i32:$x, i32:$z), (and i32:$y, (or i32:$x, i32:$z))),
|
||||
(BFI_INT (XOR i32:$x, i32:$y), i32:$z, i32:$y)
|
||||
>;
|
||||
multiclass SHA256MaPattern <Instruction BFI_INT, Instruction XOR, RegisterClass RC64> {
|
||||
def : AMDGPUPat <
|
||||
(or (and i32:$x, i32:$z), (and i32:$y, (or i32:$x, i32:$z))),
|
||||
(BFI_INT (XOR i32:$x, i32:$y), i32:$z, i32:$y)
|
||||
>;
|
||||
|
||||
def : AMDGPUPat <
|
||||
(or (and i64:$x, i64:$z), (and i64:$y, (or i64:$x, i64:$z))),
|
||||
(REG_SEQUENCE RC64,
|
||||
(BFI_INT (XOR (i32 (EXTRACT_SUBREG $x, sub0)),
|
||||
(i32 (EXTRACT_SUBREG $y, sub0))),
|
||||
(i32 (EXTRACT_SUBREG $z, sub0)),
|
||||
(i32 (EXTRACT_SUBREG $y, sub0))), sub0,
|
||||
(BFI_INT (XOR (i32 (EXTRACT_SUBREG $x, sub1)),
|
||||
(i32 (EXTRACT_SUBREG $y, sub1))),
|
||||
(i32 (EXTRACT_SUBREG $z, sub1)),
|
||||
(i32 (EXTRACT_SUBREG $y, sub1))), sub1)
|
||||
>;
|
||||
}
|
||||
|
||||
// Bitfield extract patterns
|
||||
|
||||
|
|
|
@ -693,7 +693,7 @@ def : EGOrCaymanPat<(fp_to_sint f32:$src0), (FLT_TO_INT_eg (TRUNC $src0))>;
|
|||
def : EGOrCaymanPat<(fp_to_uint f32:$src0), (FLT_TO_UINT_eg (TRUNC $src0))>;
|
||||
|
||||
// SHA-256 Patterns
|
||||
def : SHA256MaPattern <BFI_INT_eg, XOR_INT>;
|
||||
defm : SHA256MaPattern <BFI_INT_eg, XOR_INT, R600_Reg64>;
|
||||
|
||||
def EG_ExportSwz : ExportSwzInst {
|
||||
let Word1{19-16} = 0; // BURST_COUNT
|
||||
|
|
|
@ -1098,6 +1098,7 @@ let SubtargetPredicate = isGCN in {
|
|||
def : IMad24Pat<V_MAD_I32_I24, 1>;
|
||||
def : UMad24Pat<V_MAD_U32_U24, 1>;
|
||||
|
||||
// FIXME: This should only be done for VALU inputs
|
||||
defm : BFIPatterns <V_BFI_B32, S_MOV_B32, SReg_64>;
|
||||
def : ROTRPattern <V_ALIGNBIT_B32>;
|
||||
|
||||
|
@ -1487,7 +1488,7 @@ defm : BFMPatterns <i32, S_BFM_B32, S_MOV_B32>;
|
|||
// FIXME: defm : BFMPatterns <i64, S_BFM_B64, S_MOV_B64>;
|
||||
|
||||
defm : BFEPattern <V_BFE_U32, V_BFE_I32, S_MOV_B32>;
|
||||
def : SHA256MaPattern <V_BFI_B32, V_XOR_B32_e64>;
|
||||
defm : SHA256MaPattern <V_BFI_B32, V_XOR_B32_e64, SReg_64>;
|
||||
|
||||
def : IntMed3Pat<V_MED3_I32, smax, smax_oneuse, smin_oneuse>;
|
||||
def : IntMed3Pat<V_MED3_U32, umax, umax_oneuse, umin_oneuse>;
|
||||
|
|
|
@ -1,14 +1,14 @@
|
|||
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 %s
|
||||
; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck --check-prefix=SI %s
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck --check-prefix=SI %s
|
||||
; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck -check-prefixes=GCN,FUNC %s
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck -check-prefixes=GCN,FUNC %s
|
||||
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck -check-prefixes=R600,FUNC %s
|
||||
|
||||
; BFI_INT Definition pattern from ISA docs
|
||||
; (y & x) | (z & ~x)
|
||||
;
|
||||
; R600: {{^}}bfi_def:
|
||||
; FUNC-LABEL: {{^}}bfi_def:
|
||||
; R600: BFI_INT
|
||||
; SI: @bfi_def
|
||||
; SI: v_bfi_b32
|
||||
|
||||
; GCN: v_bfi_b32
|
||||
define amdgpu_kernel void @bfi_def(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
|
||||
entry:
|
||||
%0 = xor i32 %x, -1
|
||||
|
@ -21,10 +21,10 @@ entry:
|
|||
|
||||
; SHA-256 Ch function
|
||||
; z ^ (x & (y ^ z))
|
||||
; R600: {{^}}bfi_sha256_ch:
|
||||
; FUNC-LABEL: {{^}}bfi_sha256_ch:
|
||||
; R600: BFI_INT
|
||||
; SI: @bfi_sha256_ch
|
||||
; SI: v_bfi_b32
|
||||
|
||||
; GCN: v_bfi_b32
|
||||
define amdgpu_kernel void @bfi_sha256_ch(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
|
||||
entry:
|
||||
%0 = xor i32 %y, %z
|
||||
|
@ -36,12 +36,12 @@ entry:
|
|||
|
||||
; SHA-256 Ma function
|
||||
; ((x & z) | (y & (x | z)))
|
||||
; R600: {{^}}bfi_sha256_ma:
|
||||
; FUNC-LABEL: {{^}}bfi_sha256_ma:
|
||||
; R600: XOR_INT * [[DST:T[0-9]+\.[XYZW]]], KC0[2].Z, KC0[2].W
|
||||
; R600: BFI_INT * {{T[0-9]+\.[XYZW]}}, {{[[DST]]|PV\.[XYZW]}}, KC0[3].X, KC0[2].W
|
||||
; SI: v_xor_b32_e32 [[DST:v[0-9]+]], {{s[0-9]+, v[0-9]+}}
|
||||
; SI: v_bfi_b32 {{v[0-9]+}}, [[DST]], {{s[0-9]+, v[0-9]+}}
|
||||
|
||||
; GCN: v_xor_b32_e32 [[DST:v[0-9]+]], {{s[0-9]+, v[0-9]+}}
|
||||
; GCN: v_bfi_b32 {{v[0-9]+}}, [[DST]], {{s[0-9]+, v[0-9]+}}
|
||||
define amdgpu_kernel void @bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
|
||||
entry:
|
||||
%0 = and i32 %x, %z
|
||||
|
@ -51,3 +51,137 @@ entry:
|
|||
store i32 %3, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_bitselect_v2i32_pat1:
|
||||
; GCN: s_waitcnt
|
||||
; GCN-NEXT: v_bfi_b32 v1, v3, v1, v5
|
||||
; GCN-NEXT: v_bfi_b32 v0, v2, v0, v4
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define <2 x i32> @v_bitselect_v2i32_pat1(<2 x i32> %a, <2 x i32> %b, <2 x i32> %mask) {
|
||||
%xor.0 = xor <2 x i32> %a, %mask
|
||||
%and = and <2 x i32> %xor.0, %b
|
||||
%bitselect = xor <2 x i32> %and, %mask
|
||||
ret <2 x i32> %bitselect
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_bitselect_i64_pat_0:
|
||||
; GCN: s_waitcnt
|
||||
; GCN-NEXT: v_bfi_b32 v1, v1, v3, v5
|
||||
; GCN-NEXT: v_bfi_b32 v0, v0, v2, v4
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define i64 @v_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
|
||||
%and0 = and i64 %a, %b
|
||||
%not.a = xor i64 %a, -1
|
||||
%and1 = and i64 %not.a, %mask
|
||||
%bitselect = or i64 %and0, %and1
|
||||
ret i64 %bitselect
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_bitselect_i64_pat_1:
|
||||
; GCN: s_waitcnt
|
||||
; GCN-NEXT: v_bfi_b32 v1, v3, v1, v5
|
||||
; GCN-NEXT: v_bfi_b32 v0, v2, v0, v4
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define i64 @v_bitselect_i64_pat_1(i64 %a, i64 %b, i64 %mask) {
|
||||
%xor.0 = xor i64 %a, %mask
|
||||
%and = and i64 %xor.0, %b
|
||||
%bitselect = xor i64 %and, %mask
|
||||
ret i64 %bitselect
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_bitselect_i64_pat_2:
|
||||
; GCN: s_waitcnt
|
||||
; GCN-DAG: v_bfi_b32 v0, v2, v0, v4
|
||||
; GCN-DAG: v_bfi_b32 v1, v3, v1, v5
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define i64 @v_bitselect_i64_pat_2(i64 %a, i64 %b, i64 %mask) {
|
||||
%xor.0 = xor i64 %a, %mask
|
||||
%and = and i64 %xor.0, %b
|
||||
%bitselect = xor i64 %and, %mask
|
||||
ret i64 %bitselect
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_bfi_sha256_ma_i64:
|
||||
; GCN-DAG: v_xor_b32_e32 v1, v1, v3
|
||||
; GCN-DAG: v_xor_b32_e32 v0, v0, v2
|
||||
; GCN-DAG: v_bfi_b32 v1, v1, v5, v3
|
||||
; GCN-DAG: v_bfi_b32 v0, v0, v4, v2
|
||||
define i64 @v_bfi_sha256_ma_i64(i64 %x, i64 %y, i64 %z) {
|
||||
entry:
|
||||
%and0 = and i64 %x, %z
|
||||
%or0 = or i64 %x, %z
|
||||
%and1 = and i64 %y, %or0
|
||||
%or1 = or i64 %and0, %and1
|
||||
ret i64 %or1
|
||||
}
|
||||
|
||||
; FIXME: Should leave as 64-bit SALU ops
|
||||
; FUNC-LABEL: {{^}}s_bitselect_i64_pat_0:
|
||||
; GCN: v_mov_b32_e32 v{{[0-9]+}}, s
|
||||
; GCN: v_mov_b32_e32 v{{[0-9]+}}, s
|
||||
; GCN: v_bfi_b32
|
||||
; GCN: v_mov_b32_e32 v{{[0-9]+}}, s
|
||||
; GCN: v_mov_b32_e32 v{{[0-9]+}}, s
|
||||
; GCN: v_bfi_b32
|
||||
define amdgpu_kernel void @s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
|
||||
%and0 = and i64 %a, %b
|
||||
%not.a = xor i64 %a, -1
|
||||
%and1 = and i64 %not.a, %mask
|
||||
%bitselect = or i64 %and0, %and1
|
||||
%scalar.use = add i64 %bitselect, 10
|
||||
store i64 %scalar.use, i64 addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}s_bitselect_i64_pat_1:
|
||||
; GCN: v_mov_b32_e32 v{{[0-9]+}}, s
|
||||
; GCN: v_mov_b32_e32 v{{[0-9]+}}, s
|
||||
; GCN-DAG: v_bfi_b32
|
||||
; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s
|
||||
; GCN: v_mov_b32_e32 v{{[0-9]+}}, s
|
||||
; GCN: v_bfi_b32
|
||||
define amdgpu_kernel void @s_bitselect_i64_pat_1(i64 %a, i64 %b, i64 %mask) {
|
||||
%xor.0 = xor i64 %a, %mask
|
||||
%and = and i64 %xor.0, %b
|
||||
%bitselect = xor i64 %and, %mask
|
||||
|
||||
%scalar.use = add i64 %bitselect, 10
|
||||
store i64 %scalar.use, i64 addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}s_bitselect_i64_pat_2:
|
||||
; GCN: v_mov_b32_e32 v{{[0-9]+}}, s
|
||||
; GCN: v_mov_b32_e32 v{{[0-9]+}}, s
|
||||
; GCN-DAG: v_bfi_b32
|
||||
; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s
|
||||
; GCN: v_mov_b32_e32 v{{[0-9]+}}, s
|
||||
; GCN: v_bfi_b32
|
||||
define amdgpu_kernel void @s_bitselect_i64_pat_2(i64 %a, i64 %b, i64 %mask) {
|
||||
%xor.0 = xor i64 %a, %mask
|
||||
%and = and i64 %xor.0, %b
|
||||
%bitselect = xor i64 %and, %mask
|
||||
|
||||
%scalar.use = add i64 %bitselect, 10
|
||||
store i64 %scalar.use, i64 addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}s_bfi_sha256_ma_i64:
|
||||
; GCN: v_mov_b32_e32 v{{[0-9]+}}, s
|
||||
; GCN-DAG: v_xor_b32
|
||||
; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s
|
||||
; GCN-DAG: v_xor_b32
|
||||
; GCN-DAG: v_bfi_b32
|
||||
; GCN: v_bfi_b32
|
||||
define amdgpu_kernel void @s_bfi_sha256_ma_i64(i64 %x, i64 %y, i64 %z) {
|
||||
entry:
|
||||
%and0 = and i64 %x, %z
|
||||
%or0 = or i64 %x, %z
|
||||
%and1 = and i64 %y, %or0
|
||||
%or1 = or i64 %and0, %and1
|
||||
|
||||
%scalar.use = add i64 %or1, 10
|
||||
store i64 %scalar.use, i64 addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue