AMDGPU: Change pre-gfx9 implementation of fcanonicalize to mul

If f32 denormals were enabled pre-gfx9, we would still try to
implement this with v_max_f32. Pre-gfx9, these instructions ignored
the denormal mode and did not flush. Switch to the multiply form for
f32 as a workaround which should always work in any case.

This fixes conformance failures when the library implementation of
fmin/fmax were accidentally not inlined, forcing the assumption of no
flushing on targets where denormals are not enabled by default. This
is a workaround, since really we should not be mixing code with
different FP mode expectations, but prefer the lowering that will work
in any mode.

Now this will always use max to implement canonicalize on gfx9+. This
is only really beneficial for f64. For f32/f16 it's a neutral choice
(and worse in terms of code size in 1 case), but possibly worse for
the compiler since it does add an extra register use operand. Leave
this change for later.
This commit is contained in:
Matt Arsenault 2020-04-22 17:34:10 -04:00
parent d987eed91d
commit 89c8c80bd5
10 changed files with 345 additions and 258 deletions

View File

@ -1063,6 +1063,9 @@ def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">,
def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">, def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">,
AssemblerPredicate<(all_of FeatureVOP3P)>; AssemblerPredicate<(all_of FeatureVOP3P)>;
def HasMinMaxDenormModes : Predicate<"Subtarget->supportsMinMaxDenormModes()">;
def NotHasMinMaxDenormModes : Predicate<"!Subtarget->supportsMinMaxDenormModes()">;
def HasSDWA : Predicate<"Subtarget->hasSDWA()">, def HasSDWA : Predicate<"Subtarget->hasSDWA()">,
AssemblerPredicate<(all_of FeatureSDWA, FeatureVolcanicIslands)>; AssemblerPredicate<(all_of FeatureSDWA, FeatureVolcanicIslands)>;

View File

@ -77,6 +77,9 @@ class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
def TruePredicate : Predicate<"">; def TruePredicate : Predicate<"">;
// FIXME: Tablegen should specially supports this
def FalsePredicate : Predicate<"false">;
// Add a predicate to the list if does not already exist to deduplicate it. // Add a predicate to the list if does not already exist to deduplicate it.
class PredConcat<list<Predicate> lst, Predicate pred> { class PredConcat<list<Predicate> lst, Predicate pred> {
list<Predicate> ret = list<Predicate> ret =

View File

@ -1878,7 +1878,9 @@ def : GCNPat <
} }
let OtherPredicates = [NoFP16Denormals] in {
// Prefer selecting to max when legal, but using mul is always valid.
let AddedComplexity = -5 in {
def : GCNPat< def : GCNPat<
(fcanonicalize (f16 (VOP3Mods f16:$src, i32:$src_mods))), (fcanonicalize (f16 (VOP3Mods f16:$src, i32:$src_mods))),
(V_MUL_F16_e64 0, (i32 CONST.FP16_ONE), $src_mods, $src) (V_MUL_F16_e64 0, (i32 CONST.FP16_ONE), $src_mods, $src)
@ -1893,23 +1895,7 @@ def : GCNPat<
(fcanonicalize (v2f16 (VOP3PMods v2f16:$src, i32:$src_mods))), (fcanonicalize (v2f16 (VOP3PMods v2f16:$src, i32:$src_mods))),
(V_PK_MUL_F16 0, (i32 CONST.FP16_ONE), $src_mods, $src, DSTCLAMP.NONE) (V_PK_MUL_F16 0, (i32 CONST.FP16_ONE), $src_mods, $src, DSTCLAMP.NONE)
>; >;
}
let OtherPredicates = [FP16Denormals] in {
def : GCNPat<
(fcanonicalize (f16 (VOP3Mods f16:$src, i32:$src_mods))),
(V_MAX_F16_e64 $src_mods, $src, $src_mods, $src, 0, 0)
>;
let SubtargetPredicate = HasVOP3PInsts in {
def : GCNPat<
(fcanonicalize (v2f16 (VOP3PMods v2f16:$src, i32:$src_mods))),
(V_PK_MAX_F16 $src_mods, $src, $src_mods, $src, DSTCLAMP.NONE)
>;
}
}
let OtherPredicates = [NoFP32Denormals] in {
def : GCNPat< def : GCNPat<
(fcanonicalize (f32 (VOP3Mods f32:$src, i32:$src_mods))), (fcanonicalize (f32 (VOP3Mods f32:$src, i32:$src_mods))),
(V_MUL_F32_e64 0, (i32 CONST.FP32_ONE), $src_mods, $src) (V_MUL_F32_e64 0, (i32 CONST.FP32_ONE), $src_mods, $src)
@ -1919,28 +1905,68 @@ def : GCNPat<
(fcanonicalize (f32 (fneg (VOP3Mods f32:$src, i32:$src_mods)))), (fcanonicalize (f32 (fneg (VOP3Mods f32:$src, i32:$src_mods)))),
(V_MUL_F32_e64 0, (i32 CONST.FP32_NEG_ONE), $src_mods, $src) (V_MUL_F32_e64 0, (i32 CONST.FP32_NEG_ONE), $src_mods, $src)
>; >;
}
let OtherPredicates = [FP32Denormals] in { // TODO: Handle fneg like other types.
def : GCNPat<
(fcanonicalize (f32 (VOP3Mods f32:$src, i32:$src_mods))),
(V_MAX_F32_e64 $src_mods, $src, $src_mods, $src)
>;
}
let OtherPredicates = [NoFP64Denormals] in {
def : GCNPat< def : GCNPat<
(fcanonicalize (f64 (VOP3Mods f64:$src, i32:$src_mods))), (fcanonicalize (f64 (VOP3Mods f64:$src, i32:$src_mods))),
(V_MUL_F64 0, CONST.FP64_ONE, $src_mods, $src) (V_MUL_F64 0, CONST.FP64_ONE, $src_mods, $src)
>; >;
} // End AddedComplexity = -5
multiclass SelectCanonicalizeAsMax<
list<Predicate> f32_preds = [],
list<Predicate> f64_preds = [],
list<Predicate> f16_preds = []> {
def : GCNPat<
(fcanonicalize (f32 (VOP3Mods f32:$src, i32:$src_mods))),
(V_MAX_F32_e64 $src_mods, $src, $src_mods, $src)> {
let OtherPredicates = f32_preds;
}
def : GCNPat<
(fcanonicalize (f64 (VOP3Mods f64:$src, i32:$src_mods))),
(V_MAX_F64 $src_mods, $src, $src_mods, $src)> {
let OtherPredicates = f64_preds;
}
def : GCNPat<
(fcanonicalize (f16 (VOP3Mods f16:$src, i32:$src_mods))),
(V_MAX_F16_e64 $src_mods, $src, $src_mods, $src, 0, 0)> {
// FIXME: Should have 16-bit inst subtarget predicate
let OtherPredicates = f16_preds;
}
def : GCNPat<
(fcanonicalize (v2f16 (VOP3PMods v2f16:$src, i32:$src_mods))),
(V_PK_MAX_F16 $src_mods, $src, $src_mods, $src, DSTCLAMP.NONE)> {
// FIXME: Should have VOP3P subtarget predicate
let OtherPredicates = f16_preds;
}
} }
let OtherPredicates = [FP64Denormals] in { // On pre-gfx9 targets, v_max_*/v_min_* did not respect the denormal
def : GCNPat< // mode, and would never flush. For f64, it's faster to do implement
(fcanonicalize (f64 (VOP3Mods f64:$src, i32:$src_mods))), // this with a max. For f16/f32 it's a wash, but prefer max when
(V_MAX_F64 $src_mods, $src, $src_mods, $src) // valid.
>; //
} // FIXME: Lowering f32/f16 with max is worse since we can use a
// smaller encoding if the input is fneg'd. It also adds an extra
// register use.
let SubtargetPredicate = HasMinMaxDenormModes in {
defm : SelectCanonicalizeAsMax<[], [], []>;
} // End SubtargetPredicate = HasMinMaxDenormModes
let SubtargetPredicate = NotHasMinMaxDenormModes in {
// Use the max lowering if we don't need to flush.
// FIXME: We don't do use this for f32 as a workaround for the
// library being compiled with the default ieee mode, but
// potentially being called from flushing kernels. Really we should
// not be mixing code expecting different default FP modes, but mul
// works in any FP environment.
defm : SelectCanonicalizeAsMax<[FalsePredicate], [FP64Denormals], [FP16Denormals]>;
} // End SubtargetPredicate = NotHasMinMaxDenormModes
let OtherPredicates = [HasDLInsts] in { let OtherPredicates = [HasDLInsts] in {
def : GCNPat < def : GCNPat <

View File

@ -48,8 +48,8 @@ body: |
; GFX8: S_ENDPGM 0, implicit [[V_MUL_F16_e64_]] ; GFX8: S_ENDPGM 0, implicit [[V_MUL_F16_e64_]]
; GFX9-LABEL: name: fcanonicalize_f16_flush ; GFX9-LABEL: name: fcanonicalize_f16_flush
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[V_MUL_F16_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F16_e64 0, 15360, 0, [[COPY]], 0, 0, implicit $exec ; GFX9: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec
; GFX9: S_ENDPGM 0, implicit [[V_MUL_F16_e64_]] ; GFX9: S_ENDPGM 0, implicit [[V_MAX_F16_e64_]]
%0:vgpr(s32) = COPY $vgpr0 %0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_TRUNC %0 %1:vgpr(s16) = G_TRUNC %0
%2:vgpr(s16) = G_FCANONICALIZE %1 %2:vgpr(s16) = G_FCANONICALIZE %1
@ -72,8 +72,8 @@ body: |
; GFX8-LABEL: name: fcanonicalize_f32_denorm ; GFX8-LABEL: name: fcanonicalize_f32_denorm
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec ; GFX8: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, 1065353216, 0, [[COPY]], 0, 0, implicit $exec
; GFX8: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] ; GFX8: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]]
; GFX9-LABEL: name: fcanonicalize_f32_denorm ; GFX9-LABEL: name: fcanonicalize_f32_denorm
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec ; GFX9: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec
@ -103,8 +103,8 @@ body: |
; GFX8: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]] ; GFX8: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]]
; GFX9-LABEL: name: fcanonicalize_f32_flush ; GFX9-LABEL: name: fcanonicalize_f32_flush
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, 1065353216, 0, [[COPY]], 0, 0, implicit $exec ; GFX9: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec
; GFX9: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]] ; GFX9: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]]
%0:vgpr(s32) = COPY $vgpr0 %0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = G_FCANONICALIZE %0 %1:vgpr(s32) = G_FCANONICALIZE %0
S_ENDPGM 0, implicit %1 S_ENDPGM 0, implicit %1
@ -125,9 +125,9 @@ body: |
liveins: $vgpr0 liveins: $vgpr0
; GFX8-LABEL: name: fcanonicalize_v2f16_denorm ; GFX8-LABEL: name: fcanonicalize_v2f16_denorm
; GFX8: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: [[FCANONICALIZE:%[0-9]+]]:vgpr(<2 x s16>) = G_FCANONICALIZE [[COPY]] ; GFX8: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
; GFX8: S_ENDPGM 0, implicit [[FCANONICALIZE]](<2 x s16>) ; GFX8: S_ENDPGM 0, implicit [[V_PK_MAX_F16_]]
; GFX9-LABEL: name: fcanonicalize_v2f16_denorm ; GFX9-LABEL: name: fcanonicalize_v2f16_denorm
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX9: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
@ -157,8 +157,8 @@ body: |
; GFX8: S_ENDPGM 0, implicit [[V_PK_MUL_F16_]] ; GFX8: S_ENDPGM 0, implicit [[V_PK_MUL_F16_]]
; GFX9-LABEL: name: fcanonicalize_v2f16_flush ; GFX9-LABEL: name: fcanonicalize_v2f16_flush
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[V_PK_MUL_F16_:%[0-9]+]]:vgpr_32 = V_PK_MUL_F16 0, 15360, 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX9: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
; GFX9: S_ENDPGM 0, implicit [[V_PK_MUL_F16_]] ; GFX9: S_ENDPGM 0, implicit [[V_PK_MAX_F16_]]
%0:vgpr(<2 x s16>) = COPY $vgpr0 %0:vgpr(<2 x s16>) = COPY $vgpr0
%1:vgpr(<2 x s16>) = G_FCANONICALIZE %0 %1:vgpr(<2 x s16>) = G_FCANONICALIZE %0
S_ENDPGM 0, implicit %1 S_ENDPGM 0, implicit %1
@ -211,8 +211,8 @@ body: |
; GFX8: S_ENDPGM 0, implicit [[V_MUL_F64_]] ; GFX8: S_ENDPGM 0, implicit [[V_MUL_F64_]]
; GFX9-LABEL: name: fcanonicalize_f64_flush ; GFX9-LABEL: name: fcanonicalize_f64_flush
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[V_MUL_F64_:%[0-9]+]]:vreg_64 = V_MUL_F64 0, 4607182418800017408, 0, [[COPY]], 0, 0, implicit $exec ; GFX9: [[V_MAX_F64_:%[0-9]+]]:vreg_64 = V_MAX_F64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec
; GFX9: S_ENDPGM 0, implicit [[V_MUL_F64_]] ; GFX9: S_ENDPGM 0, implicit [[V_MAX_F64_]]
%0:vgpr(s64) = COPY $vgpr0_vgpr1 %0:vgpr(s64) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_FCANONICALIZE %0 %1:vgpr(s64) = G_FCANONICALIZE %0
S_ENDPGM 0, implicit %1 S_ENDPGM 0, implicit %1
@ -233,8 +233,8 @@ body: |
liveins: $vgpr0 liveins: $vgpr0
; GFX8-LABEL: name: fcanonicalize_fabs_f32_denorm ; GFX8-LABEL: name: fcanonicalize_fabs_f32_denorm
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $exec ; GFX8: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, 1065353216, 2, [[COPY]], 0, 0, implicit $exec
; GFX8: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] ; GFX8: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]]
; GFX9-LABEL: name: fcanonicalize_fabs_f32_denorm ; GFX9-LABEL: name: fcanonicalize_fabs_f32_denorm
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $exec ; GFX9: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $exec
@ -265,8 +265,8 @@ body: |
; GFX8: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]] ; GFX8: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]]
; GFX9-LABEL: name: fcanonicalize_fabs_f32_flush ; GFX9-LABEL: name: fcanonicalize_fabs_f32_flush
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, 1065353216, 2, [[COPY]], 0, 0, implicit $exec ; GFX9: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $exec
; GFX9: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]] ; GFX9: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]]
%0:vgpr(s32) = COPY $vgpr0 %0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = G_FABS %0 %1:vgpr(s32) = G_FABS %0
%2:vgpr(s32) = G_FCANONICALIZE %1 %2:vgpr(s32) = G_FCANONICALIZE %1
@ -288,8 +288,8 @@ body: |
liveins: $vgpr0 liveins: $vgpr0
; GFX8-LABEL: name: fcanonicalize_fneg_f32_denorm ; GFX8-LABEL: name: fcanonicalize_fneg_f32_denorm
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $exec ; GFX8: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, 3212836864, 0, [[COPY]], 0, 0, implicit $exec
; GFX8: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] ; GFX8: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]]
; GFX9-LABEL: name: fcanonicalize_fneg_f32_denorm ; GFX9-LABEL: name: fcanonicalize_fneg_f32_denorm
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $exec ; GFX9: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $exec
@ -319,8 +319,8 @@ body: |
; GFX8: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]] ; GFX8: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]]
; GFX9-LABEL: name: fcanonicalize_fneg_f32_flush ; GFX9-LABEL: name: fcanonicalize_fneg_f32_flush
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, 3212836864, 0, [[COPY]], 0, 0, implicit $exec ; GFX9: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $exec
; GFX9: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]] ; GFX9: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]]
%0:vgpr(s32) = COPY $vgpr0 %0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = G_FNEG %0 %1:vgpr(s32) = G_FNEG %0
%2:vgpr(s32) = G_FCANONICALIZE %1 %2:vgpr(s32) = G_FCANONICALIZE %1
@ -344,8 +344,8 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
; GFX8: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec ; GFX8: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec
; GFX8: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 2, [[V_XOR_B32_e32_]], 2, [[V_XOR_B32_e32_]], 0, 0, implicit $exec ; GFX8: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, 1065353216, 2, [[V_XOR_B32_e32_]], 0, 0, implicit $exec
; GFX8: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] ; GFX8: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]]
; GFX9-LABEL: name: fcanonicalize_fneg_fabs_f32_denorm ; GFX9-LABEL: name: fcanonicalize_fneg_fabs_f32_denorm
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
@ -382,8 +382,8 @@ body: |
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
; GFX9: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec ; GFX9: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec
; GFX9: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, 1065353216, 2, [[V_XOR_B32_e32_]], 0, 0, implicit $exec ; GFX9: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 2, [[V_XOR_B32_e32_]], 2, [[V_XOR_B32_e32_]], 0, 0, implicit $exec
; GFX9: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]] ; GFX9: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]]
%0:vgpr(s32) = COPY $vgpr0 %0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = G_FNEG %0 %1:vgpr(s32) = G_FNEG %0
%2:vgpr(s32) = G_FABS %1 %2:vgpr(s32) = G_FABS %1

View File

@ -1,10 +1,10 @@
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; GCN-LABEL: {{^}}kernel_ieee_mode_default: ; GCN-LABEL: {{^}}kernel_ieee_mode_default:
; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]] ; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]] ; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
; GCN-DAG: v_max_f32_e32 [[QUIET0:v[0-9]+]], [[VAL0]], [[VAL0]] ; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]]
; GCN-DAG: v_max_f32_e32 [[QUIET1:v[0-9]+]], [[VAL1]], [[VAL1]] ; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]] ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
; GCN-NOT: v_mul_f32 ; GCN-NOT: v_mul_f32
define amdgpu_kernel void @kernel_ieee_mode_default() #0 { define amdgpu_kernel void @kernel_ieee_mode_default() #0 {
@ -18,8 +18,8 @@ define amdgpu_kernel void @kernel_ieee_mode_default() #0 {
; GCN-LABEL: {{^}}kernel_ieee_mode_on: ; GCN-LABEL: {{^}}kernel_ieee_mode_on:
; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]] ; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]] ; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
; GCN-DAG: v_max_f32_e32 [[QUIET0:v[0-9]+]], [[VAL0]], [[VAL0]] ; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]]
; GCN-DAG: v_max_f32_e32 [[QUIET1:v[0-9]+]], [[VAL1]], [[VAL1]] ; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]] ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
; GCN-NOT: v_mul_f32 ; GCN-NOT: v_mul_f32
define amdgpu_kernel void @kernel_ieee_mode_on() #1 { define amdgpu_kernel void @kernel_ieee_mode_on() #1 {
@ -48,8 +48,8 @@ define amdgpu_kernel void @kernel_ieee_mode_off() #2 {
; GCN-LABEL: {{^}}func_ieee_mode_default: ; GCN-LABEL: {{^}}func_ieee_mode_default:
; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]] ; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]] ; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
; GCN-DAG: v_max_f32_e32 [[QUIET0:v[0-9]+]], [[VAL0]], [[VAL0]] ; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]]
; GCN-DAG: v_max_f32_e32 [[QUIET1:v[0-9]+]], [[VAL1]], [[VAL1]] ; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]] ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
; GCN-NOT: v_mul_f32 ; GCN-NOT: v_mul_f32
define void @func_ieee_mode_default() #0 { define void @func_ieee_mode_default() #0 {
@ -63,8 +63,8 @@ define void @func_ieee_mode_default() #0 {
; GCN-LABEL: {{^}}func_ieee_mode_on: ; GCN-LABEL: {{^}}func_ieee_mode_on:
; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]] ; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]] ; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
; GCN-DAG: v_max_f32_e32 [[QUIET0:v[0-9]+]], [[VAL0]], [[VAL0]] ; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]]
; GCN-DAG: v_max_f32_e32 [[QUIET1:v[0-9]+]], [[VAL1]], [[VAL1]] ; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]] ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
; GCN-NOT: v_mul_f32 ; GCN-NOT: v_mul_f32
define void @func_ieee_mode_on() #1 { define void @func_ieee_mode_on() #1 {
@ -93,8 +93,8 @@ define void @func_ieee_mode_off() #2 {
; GCN-LABEL: {{^}}cs_ieee_mode_default: ; GCN-LABEL: {{^}}cs_ieee_mode_default:
; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]] ; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]] ; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
; GCN-DAG: v_max_f32_e32 [[QUIET0:v[0-9]+]], [[VAL0]], [[VAL0]] ; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]]
; GCN-DAG: v_max_f32_e32 [[QUIET1:v[0-9]+]], [[VAL1]], [[VAL1]] ; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]] ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
; GCN-NOT: v_mul_f32 ; GCN-NOT: v_mul_f32
define amdgpu_cs void @cs_ieee_mode_default() #0 { define amdgpu_cs void @cs_ieee_mode_default() #0 {
@ -108,8 +108,8 @@ define amdgpu_cs void @cs_ieee_mode_default() #0 {
; GCN-LABEL: {{^}}cs_ieee_mode_on: ; GCN-LABEL: {{^}}cs_ieee_mode_on:
; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]] ; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]] ; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
; GCN-DAG: v_max_f32_e32 [[QUIET0:v[0-9]+]], [[VAL0]], [[VAL0]] ; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]]
; GCN-DAG: v_max_f32_e32 [[QUIET1:v[0-9]+]], [[VAL1]], [[VAL1]] ; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]] ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
; GCN-NOT: v_mul_f32 ; GCN-NOT: v_mul_f32
define amdgpu_cs void @cs_ieee_mode_on() #1 { define amdgpu_cs void @cs_ieee_mode_on() #1 {
@ -153,8 +153,8 @@ define amdgpu_ps void @ps_ieee_mode_default() #0 {
; GCN-LABEL: {{^}}ps_ieee_mode_on: ; GCN-LABEL: {{^}}ps_ieee_mode_on:
; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]] ; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]] ; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
; GCN-DAG: v_max_f32_e32 [[QUIET0:v[0-9]+]], [[VAL0]], [[VAL0]] ; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]]
; GCN-DAG: v_max_f32_e32 [[QUIET1:v[0-9]+]], [[VAL1]], [[VAL1]] ; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]] ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
; GCN-NOT: v_mul_f32 ; GCN-NOT: v_mul_f32
define amdgpu_ps void @ps_ieee_mode_on() #1 { define amdgpu_ps void @ps_ieee_mode_on() #1 {

View File

@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,GFX678 %s
; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,VI %s ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,VI,GFX678 %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9 %s ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9 %s
; GCN-LABEL: {{^}}v_clamp_f32: ; GCN-LABEL: {{^}}v_clamp_f32:
@ -74,7 +74,8 @@ define amdgpu_kernel void @v_clamp_negzero_f32(float addrspace(1)* %out, float a
; GCN-LABEL: {{^}}v_clamp_negzero_maybe_snan_f32: ; GCN-LABEL: {{^}}v_clamp_negzero_maybe_snan_f32:
; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
; GCN: v_mul_f32_e32 [[QUIET:v[0-9]+]], 1.0, [[A]] ; GFX678: v_mul_f32_e32 [[QUIET:v[0-9]+]], 1.0, [[A]]
; GFX9: v_max_f32_e32 [[QUIET:v[0-9]+]], [[A]], [[A]]
; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0x80000000, [[QUIET]] ; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0x80000000, [[QUIET]]
; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 1.0, [[MAX]] ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 1.0, [[MAX]]
define amdgpu_kernel void @v_clamp_negzero_maybe_snan_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 { define amdgpu_kernel void @v_clamp_negzero_maybe_snan_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
@ -91,7 +92,8 @@ define amdgpu_kernel void @v_clamp_negzero_maybe_snan_f32(float addrspace(1)* %o
; GCN-LABEL: {{^}}v_clamp_multi_use_max_f32: ; GCN-LABEL: {{^}}v_clamp_multi_use_max_f32:
; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
; GCN: v_mul_f32_e32 [[QUIET_A:v[0-9]+]], 1.0, [[A]] ; GFX678: v_mul_f32_e32 [[QUIET_A:v[0-9]+]], 1.0, [[A]]
; GFX9: v_max_f32_e32 [[QUIET_A:v[0-9]+]], [[A]], [[A]]
; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[QUIET_A]] ; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[QUIET_A]]
; GCN: v_min_f32_e32 [[MED:v[0-9]+]], 1.0, [[QUIET_A]] ; GCN: v_min_f32_e32 [[MED:v[0-9]+]], 1.0, [[QUIET_A]]
; GCN-NOT: [[MAX]] ; GCN-NOT: [[MAX]]
@ -416,7 +418,8 @@ define amdgpu_kernel void @v_clamp_f32_snan_dx10clamp(float addrspace(1)* %out,
; GCN-LABEL: {{^}}v_clamp_f32_snan_no_dx10clamp: ; GCN-LABEL: {{^}}v_clamp_f32_snan_no_dx10clamp:
; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
; GCN: v_mul_f32_e32 [[QUIET_A:v[0-9]+]], 1.0, [[A]] ; GFX678: v_mul_f32_e32 [[QUIET_A:v[0-9]+]], 1.0, [[A]]
; GFX9: v_max_f32_e32 [[QUIET_A:v[0-9]+]], [[A]], [[A]]
; GCN: v_med3_f32 {{v[0-9]+}}, [[QUIET_A]], 0, 1.0 ; GCN: v_med3_f32 {{v[0-9]+}}, [[QUIET_A]], 0, 1.0
define amdgpu_kernel void @v_clamp_f32_snan_no_dx10clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #4 { define amdgpu_kernel void @v_clamp_f32_snan_no_dx10clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #4 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()

View File

@ -5,8 +5,8 @@
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -denormal-fp-math-f32=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-FLUSH,GCN-FLUSH,GCN-NOEXCEPT %s ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -denormal-fp-math-f32=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-FLUSH,GCN-FLUSH,GCN-NOEXCEPT %s
; GCN-LABEL: {{^}}test_no_fold_canonicalize_loaded_value_f32: ; GCN-LABEL: {{^}}test_no_fold_canonicalize_loaded_value_f32:
; GCN-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}} ; VI: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
; GFX9-DENORM: v_max_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; GFX9: v_max_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
define amdgpu_kernel void @test_no_fold_canonicalize_loaded_value_f32(float addrspace(1)* %arg) { define amdgpu_kernel void @test_no_fold_canonicalize_loaded_value_f32(float addrspace(1)* %arg) {
%id = tail call i32 @llvm.amdgcn.workitem.id.x() %id = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
@ -172,8 +172,9 @@ define amdgpu_kernel void @test_fold_canonicalize_fmuladd_value_f32(float addrsp
; GCN-LABEL: test_fold_canonicalize_canonicalize_value_f32: ; GCN-LABEL: test_fold_canonicalize_canonicalize_value_f32:
; GCN: {{flat|global}}_load_dword [[LOAD:v[0-9]+]], ; GCN: {{flat|global}}_load_dword [[LOAD:v[0-9]+]],
; GCN-FLUSH: v_mul_f32_e32 [[V:v[0-9]+]], 1.0, [[LOAD]] ; VI: v_mul_f32_e32 [[V:v[0-9]+]], 1.0, [[LOAD]]
; GCN-DENORM: v_max_f32_e32 [[V:v[0-9]+]], [[LOAD]], [[LOAD]] ; GFX9: v_max_f32_e32 [[V:v[0-9]+]], [[LOAD]], [[LOAD]]
; GCN-NOT: v_mul ; GCN-NOT: v_mul
; GCN-NOT: v_max ; GCN-NOT: v_max
; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
@ -305,8 +306,8 @@ define amdgpu_kernel void @test_fold_canonicalize_fpround_value_v2f16_v2f32(<2 x
} }
; GCN-LABEL: test_no_fold_canonicalize_fneg_value_f32: ; GCN-LABEL: test_no_fold_canonicalize_fneg_value_f32:
; GCN-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, -1.0, v{{[0-9]+}} ; VI: v_mul_f32_e32 v{{[0-9]+}}, -1.0, v{{[0-9]+}}
; GCN-DENORM: v_max_f32_e64 v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}} ; GFX9: v_max_f32_e64 v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}
define amdgpu_kernel void @test_no_fold_canonicalize_fneg_value_f32(float addrspace(1)* %arg) { define amdgpu_kernel void @test_no_fold_canonicalize_fneg_value_f32(float addrspace(1)* %arg) {
%id = tail call i32 @llvm.amdgcn.workitem.id.x() %id = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
@ -334,8 +335,8 @@ define amdgpu_kernel void @test_fold_canonicalize_fneg_value_f32(float addrspace
} }
; GCN-LABEL: test_no_fold_canonicalize_fabs_value_f32: ; GCN-LABEL: test_no_fold_canonicalize_fabs_value_f32:
; GCN-FLUSH: v_mul_f32_e64 v{{[0-9]+}}, 1.0, |v{{[0-9]+}}| ; VI: v_mul_f32_e64 v{{[0-9]+}}, 1.0, |v{{[0-9]+}}|
; GCN-DENORM: v_max_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}|, |v{{[0-9]+}}| ; GFX9: v_max_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}|, |v{{[0-9]+}}|
define amdgpu_kernel void @test_no_fold_canonicalize_fabs_value_f32(float addrspace(1)* %arg) { define amdgpu_kernel void @test_no_fold_canonicalize_fabs_value_f32(float addrspace(1)* %arg) {
%id = tail call i32 @llvm.amdgcn.workitem.id.x() %id = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
@ -347,8 +348,9 @@ define amdgpu_kernel void @test_no_fold_canonicalize_fabs_value_f32(float addrsp
} }
; GCN-LABEL: test_no_fold_canonicalize_fcopysign_value_f32: ; GCN-LABEL: test_no_fold_canonicalize_fcopysign_value_f32:
; GCN-FLUSH: v_mul_f32_e64 v{{[0-9]+}}, 1.0, |v{{[0-9]+}}| ; VI: v_mul_f32_e64 v{{[0-9]+}}, 1.0, |v{{[0-9]+}}|
; GCN-DENORM: v_max_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}|, |v{{[0-9]+}}| ; GFX9: v_max_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}|, |v{{[0-9]+}}|
; GCN-NOT: v_mul_ ; GCN-NOT: v_mul_
; GCN-NOT: v_max_ ; GCN-NOT: v_max_
define amdgpu_kernel void @test_no_fold_canonicalize_fcopysign_value_f32(float addrspace(1)* %arg, float %sign) { define amdgpu_kernel void @test_no_fold_canonicalize_fcopysign_value_f32(float addrspace(1)* %arg, float %sign) {
@ -454,10 +456,10 @@ define amdgpu_kernel void @test_fold_canonicalize_qNaN_value_f32(float addrspace
; GCN-LABEL: test_fold_canonicalize_minnum_value_from_load_f32_ieee_mode: ; GCN-LABEL: test_fold_canonicalize_minnum_value_from_load_f32_ieee_mode:
; GCN: {{flat|global}}_load_dword [[VAL:v[0-9]+]] ; GCN: {{flat|global}}_load_dword [[VAL:v[0-9]+]]
; GCN-FLUSH: v_mul_f32_e32 [[QUIET:v[0-9]+]], 1.0, [[VAL]] ; VI: v_mul_f32_e32 [[QUIET:v[0-9]+]], 1.0, [[VAL]]
; GCN-DENORM: v_max_f32_e32 [[QUIET:v[0-9]+]], [[VAL]], [[VAL]] ; GFX9: v_max_f32_e32 [[QUIET:v[0-9]+]], [[VAL]], [[VAL]]
; GCN: v_min_f32_e32 [[V:v[0-9]+]], 0, [[QUIET]]
; GCN: v_min_f32_e32 [[V:v[0-9]+]], 0, [[QUIET]]
; GCN-NOT: v_max ; GCN-NOT: v_max
; GCN-NOT: v_mul ; GCN-NOT: v_mul
@ -512,8 +514,8 @@ define amdgpu_kernel void @test_fold_canonicalize_minnum_value_f32(float addrspa
; GCN-LABEL: test_fold_canonicalize_sNaN_value_f32: ; GCN-LABEL: test_fold_canonicalize_sNaN_value_f32:
; GCN: {{flat|global}}_load_dword [[LOAD:v[0-9]+]] ; GCN: {{flat|global}}_load_dword [[LOAD:v[0-9]+]]
; GCN-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, 1.0, [[LOAD]] ; VI: v_mul_f32_e32 v{{[0-9]+}}, 1.0, [[LOAD]]
; GCN-DENORM: v_max_f32_e32 v{{[0-9]+}}, [[LOAD]], [[LOAD]] ; GFX9: v_max_f32_e32 v{{[0-9]+}}, [[LOAD]], [[LOAD]]
define amdgpu_kernel void @test_fold_canonicalize_sNaN_value_f32(float addrspace(1)* %arg) { define amdgpu_kernel void @test_fold_canonicalize_sNaN_value_f32(float addrspace(1)* %arg) {
%id = tail call i32 @llvm.amdgcn.workitem.id.x() %id = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
@ -530,10 +532,9 @@ define amdgpu_kernel void @test_fold_canonicalize_sNaN_value_f32(float addrspace
; GFX9-DENORM: v_max_f32_e32 [[QUIET:v[0-9]+]], [[VAL]], [[VAL]] ; GFX9-DENORM: v_max_f32_e32 [[QUIET:v[0-9]+]], [[VAL]], [[VAL]]
; GFX9-DENORM: v_min_f32_e32 [[RESULT:v[0-9]+]], 0x7fffff, [[QUIET]] ; GFX9-DENORM: v_min_f32_e32 [[RESULT:v[0-9]+]], 0x7fffff, [[QUIET]]
; GFX9-FLUSH: v_mul_f32_e32 [[QUIET:v[0-9]+]], 1.0, [[VAL]] ; GFX9-FLUSH: v_max_f32_e32 [[QUIET:v[0-9]+]], [[VAL]], [[VAL]]
; GFX9-FLUSH: v_min_f32_e32 [[RESULT:v[0-9]+]], 0, [[QUIET]] ; GFX9-FLUSH: v_min_f32_e32 [[RESULT:v[0-9]+]], 0, [[QUIET]]
; VI-FLUSH: v_mul_f32_e32 [[QUIET_V0:v[0-9]+]], 1.0, [[VAL]] ; VI-FLUSH: v_mul_f32_e32 [[QUIET_V0:v[0-9]+]], 1.0, [[VAL]]
; VI-FLUSH: v_min_f32_e32 [[RESULT:v[0-9]+]], 0, [[QUIET_V0]] ; VI-FLUSH: v_min_f32_e32 [[RESULT:v[0-9]+]], 0, [[QUIET_V0]]
@ -647,7 +648,9 @@ entry:
; GFX9-DENORM: global_load_dword [[V:v[0-9]+]], ; GFX9-DENORM: global_load_dword [[V:v[0-9]+]],
; GFX9-DENORM: global_store_dword v[{{[0-9:]+}}], [[V]] ; GFX9-DENORM: global_store_dword v[{{[0-9:]+}}], [[V]]
; GFX9-DENORM-NOT: 1.0 ; GFX9-DENORM-NOT: 1.0
; GCN-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}} ; GFX9-DENORM-NOT: v_max
; VI-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
; GFX9-FLUSH: v_max_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f32(float addrspace(1)* %arg, float addrspace(1)* %out) #1 { define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f32(float addrspace(1)* %arg, float addrspace(1)* %out) #1 {
%id = tail call i32 @llvm.amdgcn.workitem.id.x() %id = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
@ -736,13 +739,9 @@ define amdgpu_ps float @test_fold_canonicalize_minnum_value_no_ieee_mode(float %
; GFX9: v_min_f32_e32 v0, v0, v1 ; GFX9: v_min_f32_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 ; GFX9-NEXT: s_setpc_b64
; VI-FLUSH-DAG: v_mul_f32_e32 v0, 1.0, v0 ; VI-DAG: v_mul_f32_e32 v0, 1.0, v0
; VI-FLUSH-DAG: v_mul_f32_e32 v1, 1.0, v1 ; VI-DAG: v_mul_f32_e32 v1, 1.0, v1
; VI-FLUSH: v_min_f32_e32 v0, v0, v1 ; VI: v_min_f32_e32 v0, v0, v1
; VI-DENORM-DAG: v_max_f32_e32 v0, v0, v0
; VI-DENORM-DAG: v_max_f32_e32 v1, v1, v1
; VI-DENORM: v_min_f32_e32 v0, v0, v1
; VI-NEXT: s_setpc_b64 ; VI-NEXT: s_setpc_b64
define float @test_fold_canonicalize_minnum_value_ieee_mode(float %arg0, float %arg1) { define float @test_fold_canonicalize_minnum_value_ieee_mode(float %arg0, float %arg1) {

View File

@ -98,7 +98,8 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_var_f16(half addrspace(1)* %
} }
; GCN-LABEL: {{^}}v_test_no_denormals_canonicalize_fneg_var_f16: ; GCN-LABEL: {{^}}v_test_no_denormals_canonicalize_fneg_var_f16:
; GFX89: v_mul_f16_e32 [[REG:v[0-9]+]], -1.0, v{{[0-9]+}} ; VI: v_mul_f16_e32 [[REG:v[0-9]+]], -1.0, v{{[0-9]+}}
; GFX9: v_max_f16_e64 [[REG:v[0-9]+]], -v{{[0-9]+}}, -v{{[0-9]+}}
; GFX89: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]] ; GFX89: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @v_test_no_denormals_canonicalize_fneg_var_f16(half addrspace(1)* %out) #2 { define amdgpu_kernel void @v_test_no_denormals_canonicalize_fneg_var_f16(half addrspace(1)* %out) #2 {
%val = load half, half addrspace(1)* %out %val = load half, half addrspace(1)* %out
@ -109,7 +110,9 @@ define amdgpu_kernel void @v_test_no_denormals_canonicalize_fneg_var_f16(half ad
} }
; GCN-LABEL: {{^}}v_test_no_denormals_canonicalize_fneg_fabs_var_f16: ; GCN-LABEL: {{^}}v_test_no_denormals_canonicalize_fneg_fabs_var_f16:
; GFX89: v_mul_f16_e64 [[REG:v[0-9]+]], -1.0, |v{{[0-9]+}}| ; VI: v_mul_f16_e64 [[REG:v[0-9]+]], -1.0, |v{{[0-9]+}}|
; GFX9: v_max_f16_e64 [[REG:v[0-9]+]], -|v{{[0-9]+}}|, -|v{{[0-9]+}}|
; GFX89: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]] ; GFX89: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
; CI: v_cvt_f32_f16_e64 {{v[0-9]+}}, -|{{v[0-9]+}}| ; CI: v_cvt_f32_f16_e64 {{v[0-9]+}}, -|{{v[0-9]+}}|

View File

@ -1,4 +1,6 @@
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6,GFX678 %s
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8,GFX678 %s
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
declare float @llvm.fabs.f32(float) #0 declare float @llvm.fabs.f32(float) #0
declare float @llvm.canonicalize.f32(float) #0 declare float @llvm.canonicalize.f32(float) #0
@ -16,8 +18,9 @@ declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>) #0
declare i32 @llvm.amdgcn.workitem.id.x() #0 declare i32 @llvm.amdgcn.workitem.id.x() #0
; GCN-LABEL: {{^}}v_test_canonicalize_var_f32: ; GCN-LABEL: {{^}}v_test_canonicalize_var_f32:
; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} ; GFX678: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}}
; GCN: buffer_store_dword [[REG]] ; GFX9: v_max_f32_e32 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @v_test_canonicalize_var_f32(float addrspace(1)* %out) #1 { define amdgpu_kernel void @v_test_canonicalize_var_f32(float addrspace(1)* %out) #1 {
%val = load float, float addrspace(1)* %out %val = load float, float addrspace(1)* %out
%canonicalized = call float @llvm.canonicalize.f32(float %val) %canonicalized = call float @llvm.canonicalize.f32(float %val)
@ -26,8 +29,9 @@ define amdgpu_kernel void @v_test_canonicalize_var_f32(float addrspace(1)* %out)
} }
; GCN-LABEL: {{^}}s_test_canonicalize_var_f32: ; GCN-LABEL: {{^}}s_test_canonicalize_var_f32:
; GCN: v_mul_f32_e64 [[REG:v[0-9]+]], 1.0, {{s[0-9]+}} ; GFX678: v_mul_f32_e64 [[REG:v[0-9]+]], 1.0, {{s[0-9]+}}
; GCN: buffer_store_dword [[REG]] ; GFX9: v_max_f32_e64 [[REG:v[0-9]+]], {{s[0-9]+}}, {{s[0-9]+}}
; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @s_test_canonicalize_var_f32(float addrspace(1)* %out, float %val) #1 { define amdgpu_kernel void @s_test_canonicalize_var_f32(float addrspace(1)* %out, float %val) #1 {
%canonicalized = call float @llvm.canonicalize.f32(float %val) %canonicalized = call float @llvm.canonicalize.f32(float %val)
store float %canonicalized, float addrspace(1)* %out store float %canonicalized, float addrspace(1)* %out
@ -35,8 +39,9 @@ define amdgpu_kernel void @s_test_canonicalize_var_f32(float addrspace(1)* %out,
} }
; GCN-LABEL: {{^}}v_test_canonicalize_fabs_var_f32: ; GCN-LABEL: {{^}}v_test_canonicalize_fabs_var_f32:
; GCN: v_mul_f32_e64 [[REG:v[0-9]+]], 1.0, |{{v[0-9]+}}| ; GFX678: v_mul_f32_e64 [[REG:v[0-9]+]], 1.0, |{{v[0-9]+}}|
; GCN: buffer_store_dword [[REG]] ; GFX9: v_max_f32_e64 [[REG:v[0-9]+]], |{{v[0-9]+}}|, |{{v[0-9]+}}|
; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @v_test_canonicalize_fabs_var_f32(float addrspace(1)* %out) #1 { define amdgpu_kernel void @v_test_canonicalize_fabs_var_f32(float addrspace(1)* %out) #1 {
%val = load float, float addrspace(1)* %out %val = load float, float addrspace(1)* %out
%val.fabs = call float @llvm.fabs.f32(float %val) %val.fabs = call float @llvm.fabs.f32(float %val)
@ -46,8 +51,9 @@ define amdgpu_kernel void @v_test_canonicalize_fabs_var_f32(float addrspace(1)*
} }
; GCN-LABEL: {{^}}v_test_canonicalize_fneg_fabs_var_f32: ; GCN-LABEL: {{^}}v_test_canonicalize_fneg_fabs_var_f32:
; GCN: v_mul_f32_e64 [[REG:v[0-9]+]], -1.0, |{{v[0-9]+}}| ; GFX678: v_mul_f32_e64 [[REG:v[0-9]+]], -1.0, |{{v[0-9]+}}|
; GCN: buffer_store_dword [[REG]] ; GFX9: v_max_f32_e64 [[REG:v[0-9]+]], -|{{v[0-9]+}}|, -|{{v[0-9]+}}|
; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f32(float addrspace(1)* %out) #1 { define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f32(float addrspace(1)* %out) #1 {
%val = load float, float addrspace(1)* %out %val = load float, float addrspace(1)* %out
%val.fabs = call float @llvm.fabs.f32(float %val) %val.fabs = call float @llvm.fabs.f32(float %val)
@ -58,8 +64,9 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f32(float addrspace
} }
; GCN-LABEL: {{^}}v_test_canonicalize_fneg_var_f32: ; GCN-LABEL: {{^}}v_test_canonicalize_fneg_var_f32:
; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], -1.0, {{v[0-9]+}} ; GFX678: v_mul_f32_e32 [[REG:v[0-9]+]], -1.0, {{v[0-9]+}}
; GCN: buffer_store_dword [[REG]] ; GFX9: v_max_f32_e64 [[REG:v[0-9]+]], -{{v[0-9]+}}, -{{v[0-9]+}}
; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @v_test_canonicalize_fneg_var_f32(float addrspace(1)* %out) #1 { define amdgpu_kernel void @v_test_canonicalize_fneg_var_f32(float addrspace(1)* %out) #1 {
%val = load float, float addrspace(1)* %out %val = load float, float addrspace(1)* %out
%val.fneg = fsub float -0.0, %val %val.fneg = fsub float -0.0, %val
@ -70,7 +77,7 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_var_f32(float addrspace(1)*
; GCN-LABEL: {{^}}test_fold_canonicalize_undef_f32: ; GCN-LABEL: {{^}}test_fold_canonicalize_undef_f32:
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}} ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
; GCN: buffer_store_dword [[REG]] ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_fold_canonicalize_undef_f32(float addrspace(1)* %out) #1 { define amdgpu_kernel void @test_fold_canonicalize_undef_f32(float addrspace(1)* %out) #1 {
%canonicalized = call float @llvm.canonicalize.f32(float undef) %canonicalized = call float @llvm.canonicalize.f32(float undef)
store float %canonicalized, float addrspace(1)* %out store float %canonicalized, float addrspace(1)* %out
@ -79,7 +86,7 @@ define amdgpu_kernel void @test_fold_canonicalize_undef_f32(float addrspace(1)*
; GCN-LABEL: {{^}}test_fold_canonicalize_p0_f32: ; GCN-LABEL: {{^}}test_fold_canonicalize_p0_f32:
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}} ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
; GCN: buffer_store_dword [[REG]] ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_fold_canonicalize_p0_f32(float addrspace(1)* %out) #1 { define amdgpu_kernel void @test_fold_canonicalize_p0_f32(float addrspace(1)* %out) #1 {
%canonicalized = call float @llvm.canonicalize.f32(float 0.0) %canonicalized = call float @llvm.canonicalize.f32(float 0.0)
store float %canonicalized, float addrspace(1)* %out store float %canonicalized, float addrspace(1)* %out
@ -88,7 +95,7 @@ define amdgpu_kernel void @test_fold_canonicalize_p0_f32(float addrspace(1)* %ou
; GCN-LABEL: {{^}}test_fold_canonicalize_n0_f32: ; GCN-LABEL: {{^}}test_fold_canonicalize_n0_f32:
; GCN: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}} ; GCN: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}}
; GCN: buffer_store_dword [[REG]] ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_fold_canonicalize_n0_f32(float addrspace(1)* %out) #1 { define amdgpu_kernel void @test_fold_canonicalize_n0_f32(float addrspace(1)* %out) #1 {
%canonicalized = call float @llvm.canonicalize.f32(float -0.0) %canonicalized = call float @llvm.canonicalize.f32(float -0.0)
store float %canonicalized, float addrspace(1)* %out store float %canonicalized, float addrspace(1)* %out
@ -97,7 +104,7 @@ define amdgpu_kernel void @test_fold_canonicalize_n0_f32(float addrspace(1)* %ou
; GCN-LABEL: {{^}}test_fold_canonicalize_p1_f32: ; GCN-LABEL: {{^}}test_fold_canonicalize_p1_f32:
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0{{$}} ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0{{$}}
; GCN: buffer_store_dword [[REG]] ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_fold_canonicalize_p1_f32(float addrspace(1)* %out) #1 { define amdgpu_kernel void @test_fold_canonicalize_p1_f32(float addrspace(1)* %out) #1 {
%canonicalized = call float @llvm.canonicalize.f32(float 1.0) %canonicalized = call float @llvm.canonicalize.f32(float 1.0)
store float %canonicalized, float addrspace(1)* %out store float %canonicalized, float addrspace(1)* %out
@ -106,7 +113,7 @@ define amdgpu_kernel void @test_fold_canonicalize_p1_f32(float addrspace(1)* %ou
; GCN-LABEL: {{^}}test_fold_canonicalize_n1_f32: ; GCN-LABEL: {{^}}test_fold_canonicalize_n1_f32:
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], -1.0{{$}} ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], -1.0{{$}}
; GCN: buffer_store_dword [[REG]] ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_fold_canonicalize_n1_f32(float addrspace(1)* %out) #1 { define amdgpu_kernel void @test_fold_canonicalize_n1_f32(float addrspace(1)* %out) #1 {
%canonicalized = call float @llvm.canonicalize.f32(float -1.0) %canonicalized = call float @llvm.canonicalize.f32(float -1.0)
store float %canonicalized, float addrspace(1)* %out store float %canonicalized, float addrspace(1)* %out
@ -115,7 +122,7 @@ define amdgpu_kernel void @test_fold_canonicalize_n1_f32(float addrspace(1)* %ou
; GCN-LABEL: {{^}}test_fold_canonicalize_literal_f32: ; GCN-LABEL: {{^}}test_fold_canonicalize_literal_f32:
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x41800000{{$}} ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x41800000{{$}}
; GCN: buffer_store_dword [[REG]] ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_fold_canonicalize_literal_f32(float addrspace(1)* %out) #1 { define amdgpu_kernel void @test_fold_canonicalize_literal_f32(float addrspace(1)* %out) #1 {
%canonicalized = call float @llvm.canonicalize.f32(float 16.0) %canonicalized = call float @llvm.canonicalize.f32(float 16.0)
store float %canonicalized, float addrspace(1)* %out store float %canonicalized, float addrspace(1)* %out
@ -124,7 +131,7 @@ define amdgpu_kernel void @test_fold_canonicalize_literal_f32(float addrspace(1)
; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal0_f32: ; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal0_f32:
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}} ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
; GCN: buffer_store_dword [[REG]] ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f32(float addrspace(1)* %out) #1 { define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f32(float addrspace(1)* %out) #1 {
%canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float)) %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float))
store float %canonicalized, float addrspace(1)* %out store float %canonicalized, float addrspace(1)* %out
@ -133,7 +140,7 @@ define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f32(flo
; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal0_f32: ; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal0_f32:
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fffff{{$}} ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fffff{{$}}
; GCN: buffer_store_dword [[REG]] ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f32(float addrspace(1)* %out) #3 { define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f32(float addrspace(1)* %out) #3 {
%canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float)) %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float))
store float %canonicalized, float addrspace(1)* %out store float %canonicalized, float addrspace(1)* %out
@ -142,7 +149,7 @@ define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f32(float
; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_f32: ; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_f32:
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}} ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
; GCN: buffer_store_dword [[REG]] ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_f32(float addrspace(1)* %out) #1 { define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_f32(float addrspace(1)* %out) #1 {
%canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float)) %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float))
store float %canonicalized, float addrspace(1)* %out store float %canonicalized, float addrspace(1)* %out
@ -151,7 +158,7 @@ define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_f32(flo
; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal1_f32: ; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal1_f32:
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x807fffff{{$}} ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x807fffff{{$}}
; GCN: buffer_store_dword [[REG]] ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f32(float addrspace(1)* %out) #3 { define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f32(float addrspace(1)* %out) #3 {
%canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float)) %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float))
store float %canonicalized, float addrspace(1)* %out store float %canonicalized, float addrspace(1)* %out
@ -160,7 +167,7 @@ define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f32(float
; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_f32: ; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_f32:
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}} ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
; GCN: buffer_store_dword [[REG]] ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_fold_canonicalize_qnan_f32(float addrspace(1)* %out) #1 { define amdgpu_kernel void @test_fold_canonicalize_qnan_f32(float addrspace(1)* %out) #1 {
%canonicalized = call float @llvm.canonicalize.f32(float 0x7FF8000000000000) %canonicalized = call float @llvm.canonicalize.f32(float 0x7FF8000000000000)
store float %canonicalized, float addrspace(1)* %out store float %canonicalized, float addrspace(1)* %out
@ -169,7 +176,7 @@ define amdgpu_kernel void @test_fold_canonicalize_qnan_f32(float addrspace(1)* %
; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg1_f32: ; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg1_f32:
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}} ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
; GCN: buffer_store_dword [[REG]] ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f32(float addrspace(1)* %out) #1 { define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f32(float addrspace(1)* %out) #1 {
%canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 -1 to float)) %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 -1 to float))
store float %canonicalized, float addrspace(1)* %out store float %canonicalized, float addrspace(1)* %out
@ -178,7 +185,7 @@ define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f32(float addr
; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg2_f32: ; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg2_f32:
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}} ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
; GCN: buffer_store_dword [[REG]] ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f32(float addrspace(1)* %out) #1 { define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f32(float addrspace(1)* %out) #1 {
%canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 -2 to float)) %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 -2 to float))
store float %canonicalized, float addrspace(1)* %out store float %canonicalized, float addrspace(1)* %out
@ -187,7 +194,7 @@ define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f32(float addr
; GCN-LABEL: {{^}}test_fold_canonicalize_snan0_value_f32: ; GCN-LABEL: {{^}}test_fold_canonicalize_snan0_value_f32:
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}} ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
; GCN: buffer_store_dword [[REG]] ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f32(float addrspace(1)* %out) #1 { define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f32(float addrspace(1)* %out) #1 {
%canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2139095041 to float)) %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2139095041 to float))
store float %canonicalized, float addrspace(1)* %out store float %canonicalized, float addrspace(1)* %out
@ -196,7 +203,7 @@ define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f32(float addrspac
; GCN-LABEL: {{^}}test_fold_canonicalize_snan1_value_f32: ; GCN-LABEL: {{^}}test_fold_canonicalize_snan1_value_f32:
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}} ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
; GCN: buffer_store_dword [[REG]] ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f32(float addrspace(1)* %out) #1 { define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f32(float addrspace(1)* %out) #1 {
%canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2143289343 to float)) %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2143289343 to float))
store float %canonicalized, float addrspace(1)* %out store float %canonicalized, float addrspace(1)* %out
@ -205,7 +212,7 @@ define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f32(float addrspac
; GCN-LABEL: {{^}}test_fold_canonicalize_snan2_value_f32: ; GCN-LABEL: {{^}}test_fold_canonicalize_snan2_value_f32:
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}} ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
; GCN: buffer_store_dword [[REG]] ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f32(float addrspace(1)* %out) #1 { define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f32(float addrspace(1)* %out) #1 {
%canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 4286578689 to float)) %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 4286578689 to float))
store float %canonicalized, float addrspace(1)* %out store float %canonicalized, float addrspace(1)* %out
@ -214,7 +221,7 @@ define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f32(float addrspac
; GCN-LABEL: {{^}}test_fold_canonicalize_snan3_value_f32: ; GCN-LABEL: {{^}}test_fold_canonicalize_snan3_value_f32:
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}} ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
; GCN: buffer_store_dword [[REG]] ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f32(float addrspace(1)* %out) #1 { define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f32(float addrspace(1)* %out) #1 {
%canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 4290772991 to float)) %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 4290772991 to float))
store float %canonicalized, float addrspace(1)* %out store float %canonicalized, float addrspace(1)* %out
@ -223,7 +230,7 @@ define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f32(float addrspac
; GCN-LABEL: {{^}}v_test_canonicalize_var_f64: ; GCN-LABEL: {{^}}v_test_canonicalize_var_f64:
; GCN: v_max_f64 [[REG:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} ; GCN: v_max_f64 [[REG:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}
; GCN: buffer_store_dwordx2 [[REG]] ; GCN: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @v_test_canonicalize_var_f64(double addrspace(1)* %out) #1 { define amdgpu_kernel void @v_test_canonicalize_var_f64(double addrspace(1)* %out) #1 {
%val = load double, double addrspace(1)* %out %val = load double, double addrspace(1)* %out
%canonicalized = call double @llvm.canonicalize.f64(double %val) %canonicalized = call double @llvm.canonicalize.f64(double %val)
@ -233,7 +240,7 @@ define amdgpu_kernel void @v_test_canonicalize_var_f64(double addrspace(1)* %out
; GCN-LABEL: {{^}}s_test_canonicalize_var_f64: ; GCN-LABEL: {{^}}s_test_canonicalize_var_f64:
; GCN: v_max_f64 [[REG:v\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} ; GCN: v_max_f64 [[REG:v\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
; GCN: buffer_store_dwordx2 [[REG]] ; GCN: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @s_test_canonicalize_var_f64(double addrspace(1)* %out, double %val) #1 { define amdgpu_kernel void @s_test_canonicalize_var_f64(double addrspace(1)* %out, double %val) #1 {
%canonicalized = call double @llvm.canonicalize.f64(double %val) %canonicalized = call double @llvm.canonicalize.f64(double %val)
store double %canonicalized, double addrspace(1)* %out store double %canonicalized, double addrspace(1)* %out
@ -242,7 +249,7 @@ define amdgpu_kernel void @s_test_canonicalize_var_f64(double addrspace(1)* %out
; GCN-LABEL: {{^}}v_test_canonicalize_fabs_var_f64: ; GCN-LABEL: {{^}}v_test_canonicalize_fabs_var_f64:
; GCN: v_max_f64 [[REG:v\[[0-9]+:[0-9]+\]]], |{{v\[[0-9]+:[0-9]+\]}}|, |{{v\[[0-9]+:[0-9]+\]}}| ; GCN: v_max_f64 [[REG:v\[[0-9]+:[0-9]+\]]], |{{v\[[0-9]+:[0-9]+\]}}|, |{{v\[[0-9]+:[0-9]+\]}}|
; GCN: buffer_store_dwordx2 [[REG]] ; GCN: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @v_test_canonicalize_fabs_var_f64(double addrspace(1)* %out) #1 { define amdgpu_kernel void @v_test_canonicalize_fabs_var_f64(double addrspace(1)* %out) #1 {
%val = load double, double addrspace(1)* %out %val = load double, double addrspace(1)* %out
%val.fabs = call double @llvm.fabs.f64(double %val) %val.fabs = call double @llvm.fabs.f64(double %val)
@ -253,7 +260,7 @@ define amdgpu_kernel void @v_test_canonicalize_fabs_var_f64(double addrspace(1)*
; GCN-LABEL: {{^}}v_test_canonicalize_fneg_fabs_var_f64: ; GCN-LABEL: {{^}}v_test_canonicalize_fneg_fabs_var_f64:
; GCN: v_max_f64 [[REG:v\[[0-9]+:[0-9]\]]], -|{{v\[[0-9]+:[0-9]+\]}}|, -|{{v\[[0-9]+:[0-9]+\]}}| ; GCN: v_max_f64 [[REG:v\[[0-9]+:[0-9]\]]], -|{{v\[[0-9]+:[0-9]+\]}}|, -|{{v\[[0-9]+:[0-9]+\]}}|
; GCN: buffer_store_dwordx2 [[REG]] ; GCN: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f64(double addrspace(1)* %out) #1 { define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f64(double addrspace(1)* %out) #1 {
%val = load double, double addrspace(1)* %out %val = load double, double addrspace(1)* %out
%val.fabs = call double @llvm.fabs.f64(double %val) %val.fabs = call double @llvm.fabs.f64(double %val)
@ -265,7 +272,7 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f64(double addrspac
; GCN-LABEL: {{^}}v_test_canonicalize_fneg_var_f64: ; GCN-LABEL: {{^}}v_test_canonicalize_fneg_var_f64:
; GCN: v_max_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -{{v\[[0-9]+:[0-9]+\]}}, -{{v\[[0-9]+:[0-9]+\]}} ; GCN: v_max_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -{{v\[[0-9]+:[0-9]+\]}}, -{{v\[[0-9]+:[0-9]+\]}}
; GCN: buffer_store_dwordx2 [[REG]] ; GCN: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @v_test_canonicalize_fneg_var_f64(double addrspace(1)* %out) #1 { define amdgpu_kernel void @v_test_canonicalize_fneg_var_f64(double addrspace(1)* %out) #1 {
%val = load double, double addrspace(1)* %out %val = load double, double addrspace(1)* %out
%val.fneg = fsub double -0.0, %val %val.fneg = fsub double -0.0, %val
@ -277,7 +284,7 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_var_f64(double addrspace(1)*
; GCN-LABEL: {{^}}test_fold_canonicalize_p0_f64: ; GCN-LABEL: {{^}}test_fold_canonicalize_p0_f64:
; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}} ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} ; GCN: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
define amdgpu_kernel void @test_fold_canonicalize_p0_f64(double addrspace(1)* %out) #1 { define amdgpu_kernel void @test_fold_canonicalize_p0_f64(double addrspace(1)* %out) #1 {
%canonicalized = call double @llvm.canonicalize.f64(double 0.0) %canonicalized = call double @llvm.canonicalize.f64(double 0.0)
store double %canonicalized, double addrspace(1)* %out store double %canonicalized, double addrspace(1)* %out
@ -287,7 +294,7 @@ define amdgpu_kernel void @test_fold_canonicalize_p0_f64(double addrspace(1)* %o
; GCN-LABEL: {{^}}test_fold_canonicalize_n0_f64: ; GCN-LABEL: {{^}}test_fold_canonicalize_n0_f64:
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; GCN-DAG: v_bfrev_b32_e32 v[[HI:[0-9]+]], 1{{$}} ; GCN-DAG: v_bfrev_b32_e32 v[[HI:[0-9]+]], 1{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} ; GCN: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
define amdgpu_kernel void @test_fold_canonicalize_n0_f64(double addrspace(1)* %out) #1 { define amdgpu_kernel void @test_fold_canonicalize_n0_f64(double addrspace(1)* %out) #1 {
%canonicalized = call double @llvm.canonicalize.f64(double -0.0) %canonicalized = call double @llvm.canonicalize.f64(double -0.0)
store double %canonicalized, double addrspace(1)* %out store double %canonicalized, double addrspace(1)* %out
@ -297,7 +304,7 @@ define amdgpu_kernel void @test_fold_canonicalize_n0_f64(double addrspace(1)* %o
; GCN-LABEL: {{^}}test_fold_canonicalize_p1_f64: ; GCN-LABEL: {{^}}test_fold_canonicalize_p1_f64:
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x3ff00000{{$}} ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x3ff00000{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} ; GCN: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
define amdgpu_kernel void @test_fold_canonicalize_p1_f64(double addrspace(1)* %out) #1 { define amdgpu_kernel void @test_fold_canonicalize_p1_f64(double addrspace(1)* %out) #1 {
%canonicalized = call double @llvm.canonicalize.f64(double 1.0) %canonicalized = call double @llvm.canonicalize.f64(double 1.0)
store double %canonicalized, double addrspace(1)* %out store double %canonicalized, double addrspace(1)* %out
@ -307,7 +314,7 @@ define amdgpu_kernel void @test_fold_canonicalize_p1_f64(double addrspace(1)* %o
; GCN-LABEL: {{^}}test_fold_canonicalize_n1_f64: ; GCN-LABEL: {{^}}test_fold_canonicalize_n1_f64:
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0xbff00000{{$}} ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0xbff00000{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} ; GCN: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
define amdgpu_kernel void @test_fold_canonicalize_n1_f64(double addrspace(1)* %out) #1 { define amdgpu_kernel void @test_fold_canonicalize_n1_f64(double addrspace(1)* %out) #1 {
%canonicalized = call double @llvm.canonicalize.f64(double -1.0) %canonicalized = call double @llvm.canonicalize.f64(double -1.0)
store double %canonicalized, double addrspace(1)* %out store double %canonicalized, double addrspace(1)* %out
@ -317,7 +324,7 @@ define amdgpu_kernel void @test_fold_canonicalize_n1_f64(double addrspace(1)* %o
; GCN-LABEL: {{^}}test_fold_canonicalize_literal_f64: ; GCN-LABEL: {{^}}test_fold_canonicalize_literal_f64:
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x40300000{{$}} ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x40300000{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} ; GCN: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
define amdgpu_kernel void @test_fold_canonicalize_literal_f64(double addrspace(1)* %out) #1 { define amdgpu_kernel void @test_fold_canonicalize_literal_f64(double addrspace(1)* %out) #1 {
%canonicalized = call double @llvm.canonicalize.f64(double 16.0) %canonicalized = call double @llvm.canonicalize.f64(double 16.0)
store double %canonicalized, double addrspace(1)* %out store double %canonicalized, double addrspace(1)* %out
@ -327,7 +334,7 @@ define amdgpu_kernel void @test_fold_canonicalize_literal_f64(double addrspace(1
; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal0_f64: ; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal0_f64:
; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}} ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} ; GCN: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f64(double addrspace(1)* %out) #2 { define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f64(double addrspace(1)* %out) #2 {
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 4503599627370495 to double)) %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 4503599627370495 to double))
store double %canonicalized, double addrspace(1)* %out store double %canonicalized, double addrspace(1)* %out
@ -337,7 +344,7 @@ define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f64(dou
; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal0_f64: ; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal0_f64:
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], -1{{$}} ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], -1{{$}}
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0xfffff{{$}} ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0xfffff{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} ; GCN: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f64(double addrspace(1)* %out) #3 { define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f64(double addrspace(1)* %out) #3 {
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 4503599627370495 to double)) %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 4503599627370495 to double))
store double %canonicalized, double addrspace(1)* %out store double %canonicalized, double addrspace(1)* %out
@ -347,7 +354,7 @@ define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f64(double
; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_f64: ; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_f64:
; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}} ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} ; GCN: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_f64(double addrspace(1)* %out) #2 { define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_f64(double addrspace(1)* %out) #2 {
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double)) %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double))
store double %canonicalized, double addrspace(1)* %out store double %canonicalized, double addrspace(1)* %out
@ -357,7 +364,7 @@ define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_f64(dou
; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal1_f64: ; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal1_f64:
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], -1{{$}} ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], -1{{$}}
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x800fffff{{$}} ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x800fffff{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} ; GCN: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f64(double addrspace(1)* %out) #3 { define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f64(double addrspace(1)* %out) #3 {
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double)) %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double))
store double %canonicalized, double addrspace(1)* %out store double %canonicalized, double addrspace(1)* %out
@ -367,7 +374,7 @@ define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f64(double
; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_f64: ; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_f64:
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}} ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} ; GCN: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
define amdgpu_kernel void @test_fold_canonicalize_qnan_f64(double addrspace(1)* %out) #1 { define amdgpu_kernel void @test_fold_canonicalize_qnan_f64(double addrspace(1)* %out) #1 {
%canonicalized = call double @llvm.canonicalize.f64(double 0x7FF8000000000000) %canonicalized = call double @llvm.canonicalize.f64(double 0x7FF8000000000000)
store double %canonicalized, double addrspace(1)* %out store double %canonicalized, double addrspace(1)* %out
@ -377,7 +384,7 @@ define amdgpu_kernel void @test_fold_canonicalize_qnan_f64(double addrspace(1)*
; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg1_f64: ; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg1_f64:
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}} ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} ; GCN: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f64(double addrspace(1)* %out) #1 { define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f64(double addrspace(1)* %out) #1 {
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 -1 to double)) %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 -1 to double))
store double %canonicalized, double addrspace(1)* %out store double %canonicalized, double addrspace(1)* %out
@ -387,7 +394,7 @@ define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f64(double add
; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg2_f64: ; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg2_f64:
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}} ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} ; GCN: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f64(double addrspace(1)* %out) #1 { define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f64(double addrspace(1)* %out) #1 {
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 -2 to double)) %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 -2 to double))
store double %canonicalized, double addrspace(1)* %out store double %canonicalized, double addrspace(1)* %out
@ -397,7 +404,7 @@ define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f64(double add
; GCN-LABEL: {{^}}test_fold_canonicalize_snan0_value_f64: ; GCN-LABEL: {{^}}test_fold_canonicalize_snan0_value_f64:
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}} ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} ; GCN: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f64(double addrspace(1)* %out) #1 { define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f64(double addrspace(1)* %out) #1 {
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9218868437227405313 to double)) %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9218868437227405313 to double))
store double %canonicalized, double addrspace(1)* %out store double %canonicalized, double addrspace(1)* %out
@ -407,7 +414,7 @@ define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f64(double addrspa
; GCN-LABEL: {{^}}test_fold_canonicalize_snan1_value_f64: ; GCN-LABEL: {{^}}test_fold_canonicalize_snan1_value_f64:
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}} ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} ; GCN: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f64(double addrspace(1)* %out) #1 { define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f64(double addrspace(1)* %out) #1 {
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9223372036854775807 to double)) %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9223372036854775807 to double))
store double %canonicalized, double addrspace(1)* %out store double %canonicalized, double addrspace(1)* %out
@ -417,7 +424,7 @@ define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f64(double addrspa
; GCN-LABEL: {{^}}test_fold_canonicalize_snan2_value_f64: ; GCN-LABEL: {{^}}test_fold_canonicalize_snan2_value_f64:
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}} ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} ; GCN: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f64(double addrspace(1)* %out) #1 { define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f64(double addrspace(1)* %out) #1 {
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 18442240474082181121 to double)) %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 18442240474082181121 to double))
store double %canonicalized, double addrspace(1)* %out store double %canonicalized, double addrspace(1)* %out
@ -427,7 +434,7 @@ define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f64(double addrspa
; GCN-LABEL: {{^}}test_fold_canonicalize_snan3_value_f64: ; GCN-LABEL: {{^}}test_fold_canonicalize_snan3_value_f64:
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}} ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} ; GCN: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f64(double addrspace(1)* %out) #1 { define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f64(double addrspace(1)* %out) #1 {
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 18446744073709551615 to double)) %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 18446744073709551615 to double))
store double %canonicalized, double addrspace(1)* %out store double %canonicalized, double addrspace(1)* %out
@ -435,7 +442,8 @@ define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f64(double addrspa
} }
; GCN-LABEL: {{^}}test_canonicalize_value_f64_flush: ; GCN-LABEL: {{^}}test_canonicalize_value_f64_flush:
; GCN: v_mul_f64 v[{{[0-9:]+}}], 1.0, v[{{[0-9:]+}}] ; GFX678: v_mul_f64 v[{{[0-9:]+}}], 1.0, v[{{[0-9:]+}}]
; GCN9: v_max_f64 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]
define amdgpu_kernel void @test_canonicalize_value_f64_flush(double addrspace(1)* %arg, double addrspace(1)* %out) #4 { define amdgpu_kernel void @test_canonicalize_value_f64_flush(double addrspace(1)* %arg, double addrspace(1)* %out) #4 {
%id = tail call i32 @llvm.amdgcn.workitem.id.x() %id = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds double, double addrspace(1)* %arg, i32 %id %gep = getelementptr inbounds double, double addrspace(1)* %arg, i32 %id
@ -447,7 +455,8 @@ define amdgpu_kernel void @test_canonicalize_value_f64_flush(double addrspace(1)
} }
; GCN-LABEL: {{^}}test_canonicalize_value_f32_flush: ; GCN-LABEL: {{^}}test_canonicalize_value_f32_flush:
; GCN: v_mul_f32_e32 {{v[0-9]+}}, 1.0, {{v[0-9]+}} ; GFX6: v_mul_f32_e32 {{v[0-9]+}}, 1.0, {{v[0-9]+}}
; GFX9: v_max_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
define amdgpu_kernel void @test_canonicalize_value_f32_flush(float addrspace(1)* %arg, float addrspace(1)* %out) #4 { define amdgpu_kernel void @test_canonicalize_value_f32_flush(float addrspace(1)* %arg, float addrspace(1)* %out) #4 {
%id = tail call i32 @llvm.amdgcn.workitem.id.x() %id = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
@ -459,7 +468,8 @@ define amdgpu_kernel void @test_canonicalize_value_f32_flush(float addrspace(1)*
} }
; GCN-LABEL: {{^}}test_canonicalize_value_f16_flush: ; GCN-LABEL: {{^}}test_canonicalize_value_f16_flush:
; GCN: v_mul_f16_e32 {{v[0-9]+}}, 1.0, {{v[0-9]+}} ; GFX8: v_mul_f16_e32 {{v[0-9]+}}, 1.0, {{v[0-9]+}}
; GFX9: v_max_f16_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
define amdgpu_kernel void @test_canonicalize_value_f16_flush(half addrspace(1)* %arg, half addrspace(1)* %out) #4 { define amdgpu_kernel void @test_canonicalize_value_f16_flush(half addrspace(1)* %arg, half addrspace(1)* %out) #4 {
%id = tail call i32 @llvm.amdgcn.workitem.id.x() %id = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id
@ -470,23 +480,13 @@ define amdgpu_kernel void @test_canonicalize_value_f16_flush(half addrspace(1)*
ret void ret void
} }
; GCN-LABEL: {{^}}test_canonicalize_value_v2f16_flush_gfx8: ; GCN-LABEL: {{^}}test_canonicalize_value_v2f16_flush:
; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 0x3c00 ; GFX8: v_mov_b32_e32 [[ONE:v[0-9]+]], 0x3c00
; GCN-DAG: v_mul_f16_sdwa v{{[0-9]+}}, [[ONE]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; GFX8-DAG: v_mul_f16_sdwa v{{[0-9]+}}, [[ONE]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GCN-DAG: v_mul_f16_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}} ; GFX8-DAG: v_mul_f16_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
define amdgpu_kernel void @test_canonicalize_value_v2f16_flush_gfx8(<2 x half> addrspace(1)* %arg, <2 x half> addrspace(1)* %out) #4 {
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i32 %id
%v = load <2 x half>, <2 x half> addrspace(1)* %gep, align 4
%canonicalized = tail call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %v)
%gep2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i32 %id
store <2 x half> %canonicalized, <2 x half> addrspace(1)* %gep2, align 2
ret void
}
; GCN-LABEL: {{^}}test_canonicalize_value_v2f16_flush_gfx9: ; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
; GCN-DAG: v_pk_mul_f16 v{{[0-9]+}}, 1.0, v{{[0-9]+}} op_sel_hi:[0,1]{{$}} define amdgpu_kernel void @test_canonicalize_value_v2f16_flush(<2 x half> addrspace(1)* %arg, <2 x half> addrspace(1)* %out) #4 {
define amdgpu_kernel void @test_canonicalize_value_v2f16_flush_gfx9(<2 x half> addrspace(1)* %arg, <2 x half> addrspace(1)* %out) #6 {
%id = tail call i32 @llvm.amdgcn.workitem.id.x() %id = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i32 %id %gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i32 %id
%v = load <2 x half>, <2 x half> addrspace(1)* %gep, align 4 %v = load <2 x half>, <2 x half> addrspace(1)* %gep, align 4
@ -498,7 +498,7 @@ define amdgpu_kernel void @test_canonicalize_value_v2f16_flush_gfx9(<2 x half> a
; GCN-LABEL: {{^}}test_canonicalize_value_f64_denorm: ; GCN-LABEL: {{^}}test_canonicalize_value_f64_denorm:
; GCN: v_max_f64 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}] ; GCN: v_max_f64 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]
define amdgpu_kernel void @test_canonicalize_value_f64_denorm(double addrspace(1)* %arg, double addrspace(1)* %out) #5 { define amdgpu_kernel void @test_canonicalize_value_f64_denorm(double addrspace(1)* %arg, double addrspace(1)* %out) #3 {
%id = tail call i32 @llvm.amdgcn.workitem.id.x() %id = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds double, double addrspace(1)* %arg, i32 %id %gep = getelementptr inbounds double, double addrspace(1)* %arg, i32 %id
%v = load double, double addrspace(1)* %gep, align 8 %v = load double, double addrspace(1)* %gep, align 8
@ -509,8 +509,9 @@ define amdgpu_kernel void @test_canonicalize_value_f64_denorm(double addrspace(1
} }
; GCN-LABEL: {{^}}test_canonicalize_value_f32_denorm: ; GCN-LABEL: {{^}}test_canonicalize_value_f32_denorm:
; GCN: v_max_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} ; GFX678: v_mul_f32_e32 {{v[0-9]+}}, 1.0, {{v[0-9]+}}
define amdgpu_kernel void @test_canonicalize_value_f32_denorm(float addrspace(1)* %arg, float addrspace(1)* %out) #5 { ; GFX9: v_max_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
define amdgpu_kernel void @test_canonicalize_value_f32_denorm(float addrspace(1)* %arg, float addrspace(1)* %out) #3 {
%id = tail call i32 @llvm.amdgcn.workitem.id.x() %id = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
%v = load float, float addrspace(1)* %gep, align 4 %v = load float, float addrspace(1)* %gep, align 4
@ -520,9 +521,12 @@ define amdgpu_kernel void @test_canonicalize_value_f32_denorm(float addrspace(1)
ret void ret void
} }
; FIXME: Conversion to float should count as the canonicalize pre-gfx8
; GCN-LABEL: {{^}}test_canonicalize_value_f16_denorm: ; GCN-LABEL: {{^}}test_canonicalize_value_f16_denorm:
; GCN: v_max_f16_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} ; GFX6: v_mul_f32_e32 {{v[0-9]+}}, 1.0, {{v[0-9]+}}
define amdgpu_kernel void @test_canonicalize_value_f16_denorm(half addrspace(1)* %arg, half addrspace(1)* %out) #5 { ; GFX8: v_max_f16_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
; GFX9: v_max_f16_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
define amdgpu_kernel void @test_canonicalize_value_f16_denorm(half addrspace(1)* %arg, half addrspace(1)* %out) #3 {
%id = tail call i32 @llvm.amdgcn.workitem.id.x() %id = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id
%v = load half, half addrspace(1)* %gep, align 2 %v = load half, half addrspace(1)* %gep, align 2
@ -533,8 +537,14 @@ define amdgpu_kernel void @test_canonicalize_value_f16_denorm(half addrspace(1)*
} }
; GCN-LABEL: {{^}}test_canonicalize_value_v2f16_denorm: ; GCN-LABEL: {{^}}test_canonicalize_value_v2f16_denorm:
; GCN: v_pk_max_f16 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} ; GFX6: v_mul_f32_e32 {{v[0-9]+}}, 1.0, {{v[0-9]+}}
define amdgpu_kernel void @test_canonicalize_value_v2f16_denorm(<2 x half> addrspace(1)* %arg, <2 x half> addrspace(1)* %out) #5 { ; GFX6: v_mul_f32_e32 {{v[0-9]+}}, 1.0, {{v[0-9]+}}
; GFX8: v_max_f16_sdwa
; GFX8: v_max_f16_e32
; GFX9: v_pk_max_f16 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
define amdgpu_kernel void @test_canonicalize_value_v2f16_denorm(<2 x half> addrspace(1)* %arg, <2 x half> addrspace(1)* %out) #3 {
%id = tail call i32 @llvm.amdgcn.workitem.id.x() %id = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i32 %id %gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i32 %id
%v = load <2 x half>, <2 x half> addrspace(1)* %gep, align 4 %v = load <2 x half>, <2 x half> addrspace(1)* %gep, align 4
@ -556,43 +566,64 @@ define amdgpu_kernel void @v_test_canonicalize_var_v2f64(<2 x double> addrspace(
ret void ret void
} }
; GCN-LABEL: {{^}}v_test_canonicalize_v2f32: ; GCN-LABEL: {{^}}v_test_canonicalize_v2f32_flush:
; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} ; GFX6: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}}
; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} ; GFX6: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}}
define <2 x float> @v_test_canonicalize_v2f32(<2 x float> %arg) #1 {
; GFX9: v_max_f32_e32 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
; GFX9: v_max_f32_e32 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
define <2 x float> @v_test_canonicalize_v2f32_flush(<2 x float> %arg) #1 {
%canon = call <2 x float> @llvm.canonicalize.v2f32(<2 x float> %arg) %canon = call <2 x float> @llvm.canonicalize.v2f32(<2 x float> %arg)
ret <2 x float> %canon ret <2 x float> %canon
} }
; GCN-LABEL: {{^}}v_test_canonicalize_v3f32: ; GCN-LABEL: {{^}}v_test_canonicalize_v3f32_flush:
; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} ; GFX6: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}}
; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} ; GFX6: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}}
; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} ; GFX6: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}}
define <3 x float> @v_test_canonicalize_v3f32(<3 x float> %arg) #1 {
; GFX9: v_max_f32_e32 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
; GFX9: v_max_f32_e32 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
; GFX9: v_max_f32_e32 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
define <3 x float> @v_test_canonicalize_v3f32_flush(<3 x float> %arg) #1 {
%canon = call <3 x float> @llvm.canonicalize.v3f32(<3 x float> %arg) %canon = call <3 x float> @llvm.canonicalize.v3f32(<3 x float> %arg)
ret <3 x float> %canon ret <3 x float> %canon
} }
; GCN-LABEL: {{^}}v_test_canonicalize_v4f32: ; GCN-LABEL: {{^}}v_test_canonicalize_v4f32_flush:
; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} ; GFX6: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}}
; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} ; GFX6: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}}
; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} ; GFX6: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}}
; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} ; GFX6: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}}
define <4 x float> @v_test_canonicalize_v4f32(<4 x float> %arg) #1 {
; GFX9: v_max_f32_e32 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
; GFX9: v_max_f32_e32 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
; GFX9: v_max_f32_e32 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
; GFX9: v_max_f32_e32 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
define <4 x float> @v_test_canonicalize_v4f32_flush(<4 x float> %arg) #1 {
%canon = call <4 x float> @llvm.canonicalize.v4f32(<4 x float> %arg) %canon = call <4 x float> @llvm.canonicalize.v4f32(<4 x float> %arg)
ret <4 x float> %canon ret <4 x float> %canon
} }
; GCN-LABEL: {{^}}v_test_canonicalize_v8f32: ; GCN-LABEL: {{^}}v_test_canonicalize_v8f32_flush:
; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} ; GFX6: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}}
; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} ; GFX6: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}}
; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} ; GFX6: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}}
; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} ; GFX6: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}}
; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} ; GFX6: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}}
; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} ; GFX6: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}}
; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} ; GFX6: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}}
; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} ; GFX6: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}}
define <8 x float> @v_test_canonicalize_v8f32(<8 x float> %arg) #1 {
; GFX9: v_max_f32_e32 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
; GFX9: v_max_f32_e32 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
; GFX9: v_max_f32_e32 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
; GFX9: v_max_f32_e32 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
; GFX9: v_max_f32_e32 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
; GFX9: v_max_f32_e32 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
; GFX9: v_max_f32_e32 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
; GFX9: v_max_f32_e32 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
define <8 x float> @v_test_canonicalize_v8f32_flush(<8 x float> %arg) #1 {
%canon = call <8 x float> @llvm.canonicalize.v8f32(<8 x float> %arg) %canon = call <8 x float> @llvm.canonicalize.v8f32(<8 x float> %arg)
ret <8 x float> %canon ret <8 x float> %canon
} }
@ -628,6 +659,4 @@ attributes #0 = { nounwind readnone }
attributes #1 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" } attributes #1 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
attributes #2 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" } attributes #2 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" }
attributes #3 = { nounwind "denormal-fp-math"="ieee,ieee" } attributes #3 = { nounwind "denormal-fp-math"="ieee,ieee" }
attributes #4 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" "target-cpu"="tonga" } attributes #4 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" }
attributes #5 = { nounwind "denormal-fp-math"="ieee,ieee" "target-cpu"="gfx900" }
attributes #6 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" "target-cpu"="gfx900" }

View File

@ -1,5 +1,6 @@
; RUN: llc -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX678,SI %s
; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GFX678,VI %s
; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
declare double @llvm.minnum.f64(double, double) #0 declare double @llvm.minnum.f64(double, double) #0
declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>) #0 declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>) #0
@ -7,26 +8,44 @@ declare <4 x double> @llvm.minnum.v4f64(<4 x double>, <4 x double>) #0
declare <8 x double> @llvm.minnum.v8f64(<8 x double>, <8 x double>) #0 declare <8 x double> @llvm.minnum.v8f64(<8 x double>, <8 x double>) #0
declare <16 x double> @llvm.minnum.v16f64(<16 x double>, <16 x double>) #0 declare <16 x double> @llvm.minnum.v16f64(<16 x double>, <16 x double>) #0
; FUNC-LABEL: {{^}}test_fmin_f64_ieee: ; GCN-LABEL: {{^}}test_fmin_f64_ieee_noflush:
; SI: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]] ; GCN: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]]
; SI: s_load_dwordx2 [[B:s\[[0-9]+:[0-9]+\]]] ; GCN: s_load_dwordx2 [[B:s\[[0-9]+:[0-9]+\]]]
; SI-DAG: v_max_f64 [[QUIETA:v\[[0-9]+:[0-9]+\]]], [[A]], [[A]]
; SI-DAG: v_max_f64 [[QUIETB:v\[[0-9]+:[0-9]+\]]], [[B]], [[B]] ; GCN-DAG: v_max_f64 [[QUIETA:v\[[0-9]+:[0-9]+\]]], [[A]], [[A]]
; SI: v_min_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[QUIETA]], [[QUIETB]] ; GCN-DAG: v_max_f64 [[QUIETB:v\[[0-9]+:[0-9]+\]]], [[B]], [[B]]
define amdgpu_kernel void @test_fmin_f64_ieee([8 x i32], double %a, [8 x i32], double %b) nounwind {
; GCN: v_min_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[QUIETA]], [[QUIETB]]
define amdgpu_kernel void @test_fmin_f64_ieee_noflush([8 x i32], double %a, [8 x i32], double %b) #1 {
%val = call double @llvm.minnum.f64(double %a, double %b) #0 %val = call double @llvm.minnum.f64(double %a, double %b) #0
store double %val, double addrspace(1)* undef, align 8 store double %val, double addrspace(1)* undef, align 8
ret void ret void
} }
; FUNC-LABEL: {{^}}test_fmin_f64_no_ieee: ; GCN-LABEL: {{^}}test_fmin_f64_ieee_flush:
; SI: ds_read_b64 [[VAL0:v\[[0-9]+:[0-9]+\]]] ; GCN: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]]
; SI: ds_read_b64 [[VAL1:v\[[0-9]+:[0-9]+\]]] ; GCN: s_load_dwordx2 [[B:s\[[0-9]+:[0-9]+\]]]
; SI-NOT: [[VAL0]] ; GFX678-DAG: v_mul_f64 [[QUIETA:v\[[0-9]+:[0-9]+\]]], 1.0, [[A]]
; SI-NOT: [[VAL1]] ; GFX678-DAG: v_mul_f64 [[QUIETB:v\[[0-9]+:[0-9]+\]]], 1.0, [[B]]
; SI: v_min_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VAL0]], [[VAL1]]
; SI-NOT: [[RESULT]] ; GFX9-DAG: v_max_f64 [[QUIETA:v\[[0-9]+:[0-9]+\]]], [[A]], [[A]]
; SI: ds_write_b64 v{{[0-9]+}}, [[RESULT]] ; GFX9-DAG: v_max_f64 [[QUIETB:v\[[0-9]+:[0-9]+\]]], [[B]], [[B]]
; GCN: v_min_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[QUIETA]], [[QUIETB]]
define amdgpu_kernel void @test_fmin_f64_ieee_flush([8 x i32], double %a, [8 x i32], double %b) #2 {
%val = call double @llvm.minnum.f64(double %a, double %b) #0
store double %val, double addrspace(1)* undef, align 8
ret void
}
; GCN-LABEL: {{^}}test_fmin_f64_no_ieee:
; GCN: ds_read_b64 [[VAL0:v\[[0-9]+:[0-9]+\]]]
; GCN: ds_read_b64 [[VAL1:v\[[0-9]+:[0-9]+\]]]
; GCN-NOT: [[VAL0]]
; GCN-NOT: [[VAL1]]
; GCN: v_min_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VAL0]], [[VAL1]]
; GCN-NOT: [[RESULT]]
; GCN: ds_write_b64 v{{[0-9]+}}, [[RESULT]]
define amdgpu_ps void @test_fmin_f64_no_ieee() nounwind { define amdgpu_ps void @test_fmin_f64_no_ieee() nounwind {
%a = load volatile double, double addrspace(3)* undef %a = load volatile double, double addrspace(3)* undef
%b = load volatile double, double addrspace(3)* undef %b = load volatile double, double addrspace(3)* undef
@ -35,58 +54,58 @@ define amdgpu_ps void @test_fmin_f64_no_ieee() nounwind {
ret void ret void
} }
; FUNC-LABEL: {{^}}test_fmin_v2f64: ; GCN-LABEL: {{^}}test_fmin_v2f64:
; SI: v_min_f64 ; GCN: v_min_f64
; SI: v_min_f64 ; GCN: v_min_f64
define amdgpu_kernel void @test_fmin_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) nounwind { define amdgpu_kernel void @test_fmin_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) nounwind {
%val = call <2 x double> @llvm.minnum.v2f64(<2 x double> %a, <2 x double> %b) #0 %val = call <2 x double> @llvm.minnum.v2f64(<2 x double> %a, <2 x double> %b) #0
store <2 x double> %val, <2 x double> addrspace(1)* %out, align 16 store <2 x double> %val, <2 x double> addrspace(1)* %out, align 16
ret void ret void
} }
; FUNC-LABEL: {{^}}test_fmin_v4f64: ; GCN-LABEL: {{^}}test_fmin_v4f64:
; SI: v_min_f64 ; GCN: v_min_f64
; SI: v_min_f64 ; GCN: v_min_f64
; SI: v_min_f64 ; GCN: v_min_f64
; SI: v_min_f64 ; GCN: v_min_f64
define amdgpu_kernel void @test_fmin_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b) nounwind { define amdgpu_kernel void @test_fmin_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b) nounwind {
%val = call <4 x double> @llvm.minnum.v4f64(<4 x double> %a, <4 x double> %b) #0 %val = call <4 x double> @llvm.minnum.v4f64(<4 x double> %a, <4 x double> %b) #0
store <4 x double> %val, <4 x double> addrspace(1)* %out, align 32 store <4 x double> %val, <4 x double> addrspace(1)* %out, align 32
ret void ret void
} }
; FUNC-LABEL: {{^}}test_fmin_v8f64: ; GCN-LABEL: {{^}}test_fmin_v8f64:
; SI: v_min_f64 ; GCN: v_min_f64
; SI: v_min_f64 ; GCN: v_min_f64
; SI: v_min_f64 ; GCN: v_min_f64
; SI: v_min_f64 ; GCN: v_min_f64
; SI: v_min_f64 ; GCN: v_min_f64
; SI: v_min_f64 ; GCN: v_min_f64
; SI: v_min_f64 ; GCN: v_min_f64
; SI: v_min_f64 ; GCN: v_min_f64
define amdgpu_kernel void @test_fmin_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b) nounwind { define amdgpu_kernel void @test_fmin_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b) nounwind {
%val = call <8 x double> @llvm.minnum.v8f64(<8 x double> %a, <8 x double> %b) #0 %val = call <8 x double> @llvm.minnum.v8f64(<8 x double> %a, <8 x double> %b) #0
store <8 x double> %val, <8 x double> addrspace(1)* %out, align 64 store <8 x double> %val, <8 x double> addrspace(1)* %out, align 64
ret void ret void
} }
; FUNC-LABEL: {{^}}test_fmin_v16f64: ; GCN-LABEL: {{^}}test_fmin_v16f64:
; SI: v_min_f64 ; GCN: v_min_f64
; SI: v_min_f64 ; GCN: v_min_f64
; SI: v_min_f64 ; GCN: v_min_f64
; SI: v_min_f64 ; GCN: v_min_f64
; SI: v_min_f64 ; GCN: v_min_f64
; SI: v_min_f64 ; GCN: v_min_f64
; SI: v_min_f64 ; GCN: v_min_f64
; SI: v_min_f64 ; GCN: v_min_f64
; SI: v_min_f64 ; GCN: v_min_f64
; SI: v_min_f64 ; GCN: v_min_f64
; SI: v_min_f64 ; GCN: v_min_f64
; SI: v_min_f64 ; GCN: v_min_f64
; SI: v_min_f64 ; GCN: v_min_f64
; SI: v_min_f64 ; GCN: v_min_f64
; SI: v_min_f64 ; GCN: v_min_f64
; SI: v_min_f64 ; GCN: v_min_f64
define amdgpu_kernel void @test_fmin_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %a, <16 x double> %b) nounwind { define amdgpu_kernel void @test_fmin_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %a, <16 x double> %b) nounwind {
%val = call <16 x double> @llvm.minnum.v16f64(<16 x double> %a, <16 x double> %b) #0 %val = call <16 x double> @llvm.minnum.v16f64(<16 x double> %a, <16 x double> %b) #0
store <16 x double> %val, <16 x double> addrspace(1)* %out, align 128 store <16 x double> %val, <16 x double> addrspace(1)* %out, align 128
@ -94,3 +113,5 @@ define amdgpu_kernel void @test_fmin_v16f64(<16 x double> addrspace(1)* %out, <1
} }
attributes #0 = { nounwind readnone } attributes #0 = { nounwind readnone }
attributes #1 = { nounwind "denormal-fp-math"="ieee,ieee" }
attributes #2 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" }