AMDGPU/GlobalISel: Fix test failure in release build

The annoying behavior where the output is different due to the
legality check struck again, plus the subtarget predicate wasn't
really correctly set for DS FP atomics.

Some of the FP min/max instructions seem to be in the gfx6/gfx7
manuals, but IIRC this might have been one of the cases where the
manual got ahead of the actual hardware support, but I've left these
as-is for now since the assembler tests seem to expect them.
This commit is contained in:
Matt Arsenault 2020-06-06 10:31:08 -04:00
parent f14d4c9c54
commit 38fb446fc7
3 changed files with 43 additions and 23 deletions

View File

@ -1048,6 +1048,9 @@ def NotLDSRequiresM0Init : Predicate<"!Subtarget->ldsRequiresM0Init()">;
def HasDSAddTid : Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">, def HasDSAddTid : Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">,
AssemblerPredicate<(all_of FeatureGFX9Insts)>; AssemblerPredicate<(all_of FeatureGFX9Insts)>;
def HasLDSFPAtomics : Predicate<"Subtarget->hasLDSFPAtomics()">,
AssemblerPredicate<(all_of FeatureGFX8Insts)>;
def HasAddNoCarryInsts : Predicate<"Subtarget->hasAddNoCarry()">, def HasAddNoCarryInsts : Predicate<"Subtarget->hasAddNoCarry()">,
AssemblerPredicate<(all_of FeatureAddNoCarryInsts)>; AssemblerPredicate<(all_of FeatureAddNoCarryInsts)>;

View File

@ -388,7 +388,12 @@ defm DS_MAX_U32 : DS_1A1D_NORET_mc<"ds_max_u32">;
defm DS_AND_B32 : DS_1A1D_NORET_mc<"ds_and_b32">; defm DS_AND_B32 : DS_1A1D_NORET_mc<"ds_and_b32">;
defm DS_OR_B32 : DS_1A1D_NORET_mc<"ds_or_b32">; defm DS_OR_B32 : DS_1A1D_NORET_mc<"ds_or_b32">;
defm DS_XOR_B32 : DS_1A1D_NORET_mc<"ds_xor_b32">; defm DS_XOR_B32 : DS_1A1D_NORET_mc<"ds_xor_b32">;
let SubtargetPredicate = HasLDSFPAtomics in {
defm DS_ADD_F32 : DS_1A1D_NORET_mc<"ds_add_f32">; defm DS_ADD_F32 : DS_1A1D_NORET_mc<"ds_add_f32">;
}
// FIXME: Are these really present pre-gfx8?
defm DS_MIN_F32 : DS_1A1D_NORET_mc<"ds_min_f32">; defm DS_MIN_F32 : DS_1A1D_NORET_mc<"ds_min_f32">;
defm DS_MAX_F32 : DS_1A1D_NORET_mc<"ds_max_f32">; defm DS_MAX_F32 : DS_1A1D_NORET_mc<"ds_max_f32">;
@ -443,7 +448,10 @@ defm DS_MIN_F64 : DS_1A1D_NORET_mc<"ds_min_f64", VReg_64>;
defm DS_MAX_F64 : DS_1A1D_NORET_mc<"ds_max_f64", VReg_64>; defm DS_MAX_F64 : DS_1A1D_NORET_mc<"ds_max_f64", VReg_64>;
defm DS_ADD_RTN_U32 : DS_1A1D_RET_mc<"ds_add_rtn_u32", VGPR_32, "ds_add_u32">; defm DS_ADD_RTN_U32 : DS_1A1D_RET_mc<"ds_add_rtn_u32", VGPR_32, "ds_add_u32">;
let SubtargetPredicate = HasLDSFPAtomics in {
defm DS_ADD_RTN_F32 : DS_1A1D_RET_mc<"ds_add_rtn_f32", VGPR_32, "ds_add_f32">; defm DS_ADD_RTN_F32 : DS_1A1D_RET_mc<"ds_add_rtn_f32", VGPR_32, "ds_add_f32">;
}
defm DS_SUB_RTN_U32 : DS_1A1D_RET_mc<"ds_sub_rtn_u32", VGPR_32, "ds_sub_u32">; defm DS_SUB_RTN_U32 : DS_1A1D_RET_mc<"ds_sub_rtn_u32", VGPR_32, "ds_sub_u32">;
defm DS_RSUB_RTN_U32 : DS_1A1D_RET_mc<"ds_rsub_rtn_u32", VGPR_32, "ds_rsub_u32">; defm DS_RSUB_RTN_U32 : DS_1A1D_RET_mc<"ds_rsub_rtn_u32", VGPR_32, "ds_rsub_u32">;
defm DS_INC_RTN_U32 : DS_1A1D_RET_mc<"ds_inc_rtn_u32", VGPR_32, "ds_inc_u32">; defm DS_INC_RTN_U32 : DS_1A1D_RET_mc<"ds_inc_rtn_u32", VGPR_32, "ds_inc_u32">;
@ -609,10 +617,12 @@ def DS_BPERMUTE_B32 : DS_1A1D_PERMUTE <"ds_bpermute_b32",
int_amdgcn_ds_bpermute>; int_amdgcn_ds_bpermute>;
} }
def DS_ADD_SRC2_F32 : DS_1A<"ds_add_src2_f32">;
} // let SubtargetPredicate = isGFX8Plus } // let SubtargetPredicate = isGFX8Plus
let SubtargetPredicate = HasLDSFPAtomics in {
def DS_ADD_SRC2_F32 : DS_1A<"ds_add_src2_f32">;
}
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// DS Patterns // DS Patterns
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
@ -830,9 +840,12 @@ defm : DSAtomicRetPat_mc<DS_MAX_RTN_I32, i32, "atomic_load_max">;
defm : DSAtomicRetPat_mc<DS_MIN_RTN_U32, i32, "atomic_load_umin">; defm : DSAtomicRetPat_mc<DS_MIN_RTN_U32, i32, "atomic_load_umin">;
defm : DSAtomicRetPat_mc<DS_MAX_RTN_U32, i32, "atomic_load_umax">; defm : DSAtomicRetPat_mc<DS_MAX_RTN_U32, i32, "atomic_load_umax">;
defm : DSAtomicCmpXChg_mc<DS_CMPST_RTN_B32, i32, "atomic_cmp_swap">; defm : DSAtomicCmpXChg_mc<DS_CMPST_RTN_B32, i32, "atomic_cmp_swap">;
let SubtargetPredicate = HasLDSFPAtomics in {
defm : DSAtomicRetPat_mc<DS_MIN_RTN_F32, f32, "atomic_load_fmin">; defm : DSAtomicRetPat_mc<DS_MIN_RTN_F32, f32, "atomic_load_fmin">;
defm : DSAtomicRetPat_mc<DS_MAX_RTN_F32, f32, "atomic_load_fmax">; defm : DSAtomicRetPat_mc<DS_MAX_RTN_F32, f32, "atomic_load_fmax">;
defm : DSAtomicRetPat_mc<DS_ADD_RTN_F32, f32, "atomic_load_fadd">; defm : DSAtomicRetPat_mc<DS_ADD_RTN_F32, f32, "atomic_load_fadd">;
}
// 64-bit atomics. // 64-bit atomics.
defm : DSAtomicRetPat_mc<DS_WRXCHG_RTN_B64, i64, "atomic_swap">; defm : DSAtomicRetPat_mc<DS_WRXCHG_RTN_B64, i64, "atomic_swap">;

View File

@ -1,10 +1,11 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s
# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
# GFX6/7 selection should fail.
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -disable-gisel-legality-check -o - %s | FileCheck -check-prefix=GFX6 %s
# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -disable-gisel-legality-check -o - %s | FileCheck -check-prefix=GFX6 %s
--- ---
name: atomicrmw_fadd_s32_local name: atomicrmw_fadd_s32_local
@ -15,12 +16,6 @@ body: |
bb.0: bb.0:
liveins: $vgpr0, $vgpr1 liveins: $vgpr0, $vgpr1
; GFX6-LABEL: name: atomicrmw_fadd_s32_local
; GFX6: liveins: $vgpr0, $vgpr1
; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; GFX6: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_FADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst 4, addrspace 3)
; GFX6: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32)
; GFX8-LABEL: name: atomicrmw_fadd_s32_local ; GFX8-LABEL: name: atomicrmw_fadd_s32_local
; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8: liveins: $vgpr0, $vgpr1
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -34,6 +29,13 @@ body: |
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: [[DS_ADD_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 3) ; GFX9: [[DS_ADD_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 3)
; GFX9: $vgpr0 = COPY [[DS_ADD_RTN_F32_gfx9_]] ; GFX9: $vgpr0 = COPY [[DS_ADD_RTN_F32_gfx9_]]
; GFX6-LABEL: name: atomicrmw_fadd_s32_local
; GFX6: liveins: $vgpr0, $vgpr1
; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; GFX6: $m0 = S_MOV_B32 -1
; GFX6: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr_32(s32) = G_ATOMICRMW_FADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst 4, addrspace 3)
; GFX6: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32)
%0:vgpr(p3) = COPY $vgpr0 %0:vgpr(p3) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1 %1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = G_ATOMICRMW_FADD %0(p3), %1 :: (load store seq_cst 4, addrspace 3) %2:vgpr(s32) = G_ATOMICRMW_FADD %0(p3), %1 :: (load store seq_cst 4, addrspace 3)
@ -50,11 +52,6 @@ body: |
bb.0: bb.0:
liveins: $vgpr0, $vgpr1 liveins: $vgpr0, $vgpr1
; GFX6-LABEL: name: atomicrmw_fadd_s32_local_noret
; GFX6: liveins: $vgpr0, $vgpr1
; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; GFX6: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_FADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst 4, addrspace 3)
; GFX8-LABEL: name: atomicrmw_fadd_s32_local_noret ; GFX8-LABEL: name: atomicrmw_fadd_s32_local_noret
; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8: liveins: $vgpr0, $vgpr1
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -66,6 +63,12 @@ body: |
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: [[DS_ADD_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 3) ; GFX9: [[DS_ADD_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 3)
; GFX6-LABEL: name: atomicrmw_fadd_s32_local_noret
; GFX6: liveins: $vgpr0, $vgpr1
; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; GFX6: $m0 = S_MOV_B32 -1
; GFX6: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_FADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst 4, addrspace 3)
%0:vgpr(p3) = COPY $vgpr0 %0:vgpr(p3) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1 %1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = G_ATOMICRMW_FADD %0(p3), %1 :: (load store seq_cst 4, addrspace 3) %2:vgpr(s32) = G_ATOMICRMW_FADD %0(p3), %1 :: (load store seq_cst 4, addrspace 3)
@ -81,14 +84,6 @@ body: |
bb.0: bb.0:
liveins: $vgpr0, $vgpr1 liveins: $vgpr0, $vgpr1
; GFX6-LABEL: name: atomicrmw_fadd_s32_local_gep4
; GFX6: liveins: $vgpr0, $vgpr1
; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; GFX6: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 4
; GFX6: [[PTR_ADD:%[0-9]+]]:vgpr(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
; GFX6: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_FADD [[PTR_ADD]](p3), [[COPY1]] :: (load store seq_cst 4, addrspace 3)
; GFX6: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32)
; GFX8-LABEL: name: atomicrmw_fadd_s32_local_gep4 ; GFX8-LABEL: name: atomicrmw_fadd_s32_local_gep4
; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8: liveins: $vgpr0, $vgpr1
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -102,6 +97,15 @@ body: |
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: [[DS_ADD_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32_gfx9 [[COPY]], [[COPY1]], 4, 0, implicit $exec :: (load store seq_cst 4, addrspace 3) ; GFX9: [[DS_ADD_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32_gfx9 [[COPY]], [[COPY1]], 4, 0, implicit $exec :: (load store seq_cst 4, addrspace 3)
; GFX9: $vgpr0 = COPY [[DS_ADD_RTN_F32_gfx9_]] ; GFX9: $vgpr0 = COPY [[DS_ADD_RTN_F32_gfx9_]]
; GFX6-LABEL: name: atomicrmw_fadd_s32_local_gep4
; GFX6: liveins: $vgpr0, $vgpr1
; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; GFX6: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 4
; GFX6: [[PTR_ADD:%[0-9]+]]:vgpr(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
; GFX6: $m0 = S_MOV_B32 -1
; GFX6: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr_32(s32) = G_ATOMICRMW_FADD [[PTR_ADD]](p3), [[COPY1]] :: (load store seq_cst 4, addrspace 3)
; GFX6: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32)
%0:vgpr(p3) = COPY $vgpr0 %0:vgpr(p3) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1 %1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = G_CONSTANT i32 4 %2:vgpr(s32) = G_CONSTANT i32 4