forked from OSchip/llvm-project
[AMDGPU] Allow abs/neg source modifiers on v_cndmask_b32
Summary: D59191 added support for these modifiers in the assembler and disassembler. This patch just teaches instruction selection that it can use them. Reviewers: arsenm, tstellar Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64497 llvm-svn: 365640
This commit is contained in:
parent
a23c5694fb
commit
bba37e89a5
|
@ -746,17 +746,18 @@ def : FMADModsPat<V_MAD_F16, AMDGPUfmad_ftz, f16> {
|
|||
let SubtargetPredicate = Has16BitInsts;
|
||||
}
|
||||
|
||||
multiclass SelectPat <ValueType vt, Instruction inst> {
|
||||
multiclass SelectPat <ValueType vt> {
|
||||
def : GCNPat <
|
||||
(vt (select i1:$src0, vt:$src1, vt:$src2)),
|
||||
(inst (i32 0), $src2, (i32 0), $src1, $src0)
|
||||
(vt (select i1:$src0, (VOP3Mods vt:$src1, i32:$src1_mods),
|
||||
(VOP3Mods vt:$src2, i32:$src2_mods))),
|
||||
(V_CNDMASK_B32_e64 $src2_mods, $src2, $src1_mods, $src1, $src0)
|
||||
>;
|
||||
}
|
||||
|
||||
defm : SelectPat <i16, V_CNDMASK_B32_e64>;
|
||||
defm : SelectPat <i32, V_CNDMASK_B32_e64>;
|
||||
defm : SelectPat <f16, V_CNDMASK_B32_e64>;
|
||||
defm : SelectPat <f32, V_CNDMASK_B32_e64>;
|
||||
defm : SelectPat <i16>;
|
||||
defm : SelectPat <i32>;
|
||||
defm : SelectPat <f16>;
|
||||
defm : SelectPat <f32>;
|
||||
|
||||
let AddedComplexity = 1 in {
|
||||
def : GCNPat <
|
||||
|
|
|
@ -217,8 +217,8 @@ define amdgpu_kernel void @v_fneg_add_multi_use_fneg_x_f32(float addrspace(1)* %
|
|||
; This one asserted with -enable-no-signed-zeros-fp-math
|
||||
; GCN-LABEL: {{^}}fneg_fadd_0:
|
||||
; GCN-SAFE-DAG: v_mad_f32 [[A:v[0-9]+]],
|
||||
; GCN-SAFE-DAG: v_xor_b32_e32 [[B:v[0-9]+]], 0x80000000
|
||||
; GCN-SAFE-DAG: v_cmp_ngt_f32_e32 {{.*}}, [[A]]
|
||||
; GCN-SAFE-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, -[[A]]
|
||||
; GCN-NSZ-DAG: v_mac_f32_e32 [[C:v[0-9]+]],
|
||||
; GCN-NSZ-DAG: v_cmp_nlt_f32_e64 {{.*}}, -[[C]]
|
||||
|
||||
|
|
|
@ -101,8 +101,7 @@ define amdgpu_kernel void @add_select_multi_use_rhs_fabs_fabs_f32(i32 %c) #0 {
|
|||
; GCN: buffer_load_dword [[Y:v[0-9]+]]
|
||||
; GCN: buffer_load_dword [[Z:v[0-9]+]]
|
||||
|
||||
; GCN: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]]
|
||||
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X_ABS]], vcc
|
||||
; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[Y]], |[[X]]|,
|
||||
; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
|
||||
define amdgpu_kernel void @add_select_fabs_var_f32(i32 %c) #0 {
|
||||
%x = load volatile float, float addrspace(1)* undef
|
||||
|
@ -120,8 +119,7 @@ define amdgpu_kernel void @add_select_fabs_var_f32(i32 %c) #0 {
|
|||
; GCN: buffer_load_dword [[X:v[0-9]+]]
|
||||
; GCN: buffer_load_dword [[Y:v[0-9]+]]
|
||||
|
||||
; GCN: v_and_b32_e32 [[FABS_X:v[0-9]+]], 0x7fffffff, [[X]]
|
||||
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -1.0, [[FABS_X]], vcc
|
||||
; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, |[[X]]|,
|
||||
; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
|
||||
define amdgpu_kernel void @add_select_fabs_negk_f32(i32 %c) #0 {
|
||||
%x = load volatile float, float addrspace(1)* undef
|
||||
|
@ -168,9 +166,8 @@ define amdgpu_kernel void @add_select_posk_posk_f32(i32 %c) #0 {
|
|||
; GCN: buffer_load_dword [[X:v[0-9]+]]
|
||||
; GCN: buffer_load_dword [[Y:v[0-9]+]]
|
||||
|
||||
; GCN-DAG: v_and_b32_e32 [[FABS_X:v[0-9]+]], 0x7fffffff, [[X]]
|
||||
; GCN-DAG: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0
|
||||
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -1.0, [[FABS_X]], vcc
|
||||
; GCN-DAG: v_cmp_ne_u32_e64 [[VCC:.*]], s{{[0-9]+}}, 0
|
||||
; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, |[[X]]|, [[VCC]]
|
||||
; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
|
||||
define amdgpu_kernel void @add_select_negk_fabs_f32(i32 %c) #0 {
|
||||
%x = load volatile float, float addrspace(1)* undef
|
||||
|
@ -188,9 +185,8 @@ define amdgpu_kernel void @add_select_negk_fabs_f32(i32 %c) #0 {
|
|||
; GCN: buffer_load_dword [[Y:v[0-9]+]]
|
||||
; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xc4800000
|
||||
|
||||
; GCN-DAG: v_and_b32_e32 [[FABS_X:v[0-9]+]], 0x7fffffff, [[X]]
|
||||
; GCN-DAG: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0
|
||||
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K]], [[FABS_X]], vcc
|
||||
; GCN-DAG: v_cmp_ne_u32_e64 [[VCC:.*]], s{{[0-9]+}}, 0
|
||||
; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[K]], |[[X]]|, [[VCC]]
|
||||
; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
|
||||
define amdgpu_kernel void @add_select_negliteralk_fabs_f32(i32 %c) #0 {
|
||||
%x = load volatile float, float addrspace(1)* undef
|
||||
|
@ -339,8 +335,7 @@ define amdgpu_kernel void @add_select_multi_use_rhs_fneg_fneg_f32(i32 %c) #0 {
|
|||
; GCN: buffer_load_dword [[Y:v[0-9]+]]
|
||||
; GCN: buffer_load_dword [[Z:v[0-9]+]]
|
||||
|
||||
; GCN: v_xor_b32_e32 [[X_NEG:v[0-9]+]], 0x80000000, [[X]]
|
||||
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X_NEG]], vcc
|
||||
; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[Y]], -[[X]],
|
||||
; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
|
||||
define amdgpu_kernel void @add_select_fneg_var_f32(i32 %c) #0 {
|
||||
%x = load volatile float, float addrspace(1)* undef
|
||||
|
@ -514,9 +509,7 @@ define amdgpu_kernel void @add_select_posk_fneg_f32(i32 %c) #0 {
|
|||
; GCN: buffer_load_dword [[Y:v[0-9]+]]
|
||||
; GCN: buffer_load_dword [[Z:v[0-9]+]]
|
||||
|
||||
; GCN-DAG: v_or_b32_e32 [[X_NEG_ABS:v[0-9]+]], 0x80000000, [[X]]
|
||||
; GCN-DAG: v_and_b32_e32 [[Y_ABS:v[0-9]+]], 0x7fffffff, [[Y]]
|
||||
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y_ABS]], [[X_NEG_ABS]], vcc
|
||||
; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], |[[Y]]|, -|[[X]]|,
|
||||
; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
|
||||
define amdgpu_kernel void @add_select_negfabs_fabs_f32(i32 %c) #0 {
|
||||
%x = load volatile float, float addrspace(1)* undef
|
||||
|
@ -537,9 +530,7 @@ define amdgpu_kernel void @add_select_negfabs_fabs_f32(i32 %c) #0 {
|
|||
; GCN: buffer_load_dword [[Y:v[0-9]+]]
|
||||
; GCN: buffer_load_dword [[Z:v[0-9]+]]
|
||||
|
||||
; GCN-DAG: v_or_b32_e32 [[Y_NEG_ABS:v[0-9]+]], 0x80000000, [[Y]]
|
||||
; GCN-DAG: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]]
|
||||
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y_NEG_ABS]], [[X_ABS]], vcc
|
||||
; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -|[[Y]]|, |[[X]]|,
|
||||
; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
|
||||
define amdgpu_kernel void @add_select_fabs_negfabs_f32(i32 %c) #0 {
|
||||
%x = load volatile float, float addrspace(1)* undef
|
||||
|
@ -560,9 +551,7 @@ define amdgpu_kernel void @add_select_fabs_negfabs_f32(i32 %c) #0 {
|
|||
; GCN: buffer_load_dword [[Y:v[0-9]+]]
|
||||
; GCN: buffer_load_dword [[Z:v[0-9]+]]
|
||||
|
||||
; GCN-DAG: v_xor_b32_e32 [[X_NEG:v[0-9]+]], 0x80000000, [[X]]
|
||||
; GCN-DAG: v_and_b32_e32 [[Y_ABS:v[0-9]+]], 0x7fffffff, [[Y]]
|
||||
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y_ABS]], [[X_NEG]], vcc
|
||||
; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], |[[Y]]|, -[[X]],
|
||||
; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
|
||||
define amdgpu_kernel void @add_select_neg_fabs_f32(i32 %c) #0 {
|
||||
%x = load volatile float, float addrspace(1)* undef
|
||||
|
@ -582,9 +571,7 @@ define amdgpu_kernel void @add_select_neg_fabs_f32(i32 %c) #0 {
|
|||
; GCN: buffer_load_dword [[Y:v[0-9]+]]
|
||||
; GCN: buffer_load_dword [[Z:v[0-9]+]]
|
||||
|
||||
; GCN-DAG: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]]
|
||||
; GCN-DAG: v_xor_b32_e32 [[Y_NEG:v[0-9]+]], 0x80000000, [[Y]]
|
||||
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y_NEG]], [[X_ABS]], vcc
|
||||
; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -[[Y]], |[[X]]|,
|
||||
; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
|
||||
define amdgpu_kernel void @add_select_fabs_neg_f32(i32 %c) #0 {
|
||||
%x = load volatile float, float addrspace(1)* undef
|
||||
|
@ -604,8 +591,7 @@ define amdgpu_kernel void @add_select_fabs_neg_f32(i32 %c) #0 {
|
|||
; GCN: buffer_load_dword [[Y:v[0-9]+]]
|
||||
; GCN: buffer_load_dword [[Z:v[0-9]+]]
|
||||
|
||||
; GCN-DAG: v_and_b32_e32 [[Y_ABS:v[0-9]+]], 0x7fffffff, [[Y]]
|
||||
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y_ABS]], [[X]], vcc
|
||||
; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], |[[Y]]|, [[X]],
|
||||
; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
|
||||
define amdgpu_kernel void @add_select_neg_negfabs_f32(i32 %c) #0 {
|
||||
%x = load volatile float, float addrspace(1)* undef
|
||||
|
@ -626,8 +612,7 @@ define amdgpu_kernel void @add_select_neg_negfabs_f32(i32 %c) #0 {
|
|||
; GCN: buffer_load_dword [[Y:v[0-9]+]]
|
||||
; GCN: buffer_load_dword [[Z:v[0-9]+]]
|
||||
|
||||
; GCN-DAG: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]]
|
||||
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[X_ABS]], [[Y]], vcc
|
||||
; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], |[[X]]|, [[Y]],
|
||||
; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
|
||||
define amdgpu_kernel void @add_select_negfabs_neg_f32(i32 %c) #0 {
|
||||
%x = load volatile float, float addrspace(1)* undef
|
||||
|
@ -647,9 +632,8 @@ define amdgpu_kernel void @add_select_negfabs_neg_f32(i32 %c) #0 {
|
|||
; GCN: buffer_load_dword [[X:v[0-9]+]]
|
||||
; GCN: buffer_load_dword [[Y:v[0-9]+]]
|
||||
|
||||
; GCN-DAG: v_cmp_eq_u32_e64 vcc,
|
||||
; GCN-DAG: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]]
|
||||
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -4.0, [[X_ABS]], vcc
|
||||
; GCN-DAG: v_cmp_eq_u32_e64 [[VCC:.*]], s{{[0-9]+}}, 0
|
||||
; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -4.0, |[[X]]|, [[VCC]]
|
||||
; GCN: v_mul_f32_e64 v{{[0-9]+}}, -[[SELECT]], [[Y]]
|
||||
define amdgpu_kernel void @mul_select_negfabs_posk_f32(i32 %c) #0 {
|
||||
%x = load volatile float, float addrspace(1)* undef
|
||||
|
@ -667,10 +651,8 @@ define amdgpu_kernel void @mul_select_negfabs_posk_f32(i32 %c) #0 {
|
|||
; GCN: buffer_load_dword [[X:v[0-9]+]]
|
||||
; GCN: buffer_load_dword [[Y:v[0-9]+]]
|
||||
|
||||
; GCN-DAG: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0
|
||||
; GCN-DAG: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]]
|
||||
|
||||
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -4.0, [[X_ABS]], vcc
|
||||
; GCN-DAG: v_cmp_ne_u32_e64 [[VCC:.*]], s{{[0-9]+}}, 0
|
||||
; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -4.0, |[[X]]|, [[VCC]]
|
||||
; GCN: v_mul_f32_e64 v{{[0-9]+}}, -[[SELECT]], [[Y]]
|
||||
define amdgpu_kernel void @mul_select_posk_negfabs_f32(i32 %c) #0 {
|
||||
%x = load volatile float, float addrspace(1)* undef
|
||||
|
|
|
@ -24,8 +24,7 @@ define amdgpu_kernel void @s_sint_to_fp_i64_to_f16(half addrspace(1)* %out, i64
|
|||
; GCN-DAG: v_cmp_eq_u64
|
||||
; GCN-DAG: v_cmp_gt_u64
|
||||
|
||||
; GCN: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
|
||||
; GCN: v_cndmask_b32_e{{32|64}} [[SIGN_SEL:v[0-9]+]],
|
||||
; GCN: v_cndmask_b32_e64 [[SIGN_SEL:v[0-9]+]], v{{[0-9]+}}, -v{{[0-9]+}}
|
||||
; GCN: v_cvt_f16_f32_e32 [[SIGN_SEL_F16:v[0-9]+]], [[SIGN_SEL]]
|
||||
; GCN: {{buffer|flat}}_store_short {{.*}}[[SIGN_SEL_F16]]
|
||||
define amdgpu_kernel void @v_sint_to_fp_i64_to_f16(half addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
|
||||
|
@ -59,8 +58,7 @@ define amdgpu_kernel void @s_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64
|
|||
; GCN-DAG: v_cmp_eq_u64
|
||||
; GCN-DAG: v_cmp_gt_u64
|
||||
|
||||
; GCN: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
|
||||
; GCN: v_cndmask_b32_e{{32|64}} [[SIGN_SEL:v[0-9]+]],
|
||||
; GCN: v_cndmask_b32_e64 [[SIGN_SEL:v[0-9]+]], v{{[0-9]+}}, -v{{[0-9]+}}
|
||||
; GCN: {{buffer|flat}}_store_dword {{.*}}[[SIGN_SEL]]
|
||||
define amdgpu_kernel void @v_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
|
|
Loading…
Reference in New Issue