forked from OSchip/llvm-project
[CodeGen] Mark the promoted FCOPYSIGN result FP_ROUND as TRUNCating.
Now that we can properly promote mismatched FCOPYSIGNs (r244858), we can mark the FP_ROUND on the result as truncating, to expose folding. FCOPYSIGN doesn't change anything but the sign bit, so (fp_round (fcopysign (fpext a), b)) is equivalent to (modulo the sign bit): (fp_round (fpext a)) which is a no-op. llvm-svn: 244862
This commit is contained in:
parent
b2a9ed910e
commit
a196661bb0
|
@ -4301,8 +4301,15 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
|
|||
Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
|
||||
Tmp2 = Node->getOperand(1);
|
||||
Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
|
||||
|
||||
// fcopysign doesn't change anything but the sign bit, so
|
||||
// (fp_round (fcopysign (fpext a), b))
|
||||
// is as precise as
|
||||
// (fp_round (fpext a))
|
||||
// which is a no-op. Mark it as a TRUNCating FP_ROUND.
|
||||
const bool isTrunc = (Node->getOpcode() == ISD::FCOPYSIGN);
|
||||
Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
|
||||
Tmp3, DAG.getIntPtrConstant(0, dl)));
|
||||
Tmp3, DAG.getIntPtrConstant(isTrunc, dl)));
|
||||
break;
|
||||
}
|
||||
case ISD::FFLOOR:
|
||||
|
|
|
@ -702,6 +702,21 @@ define half @test_copysign_f64(half %a, double %b) #0 {
|
|||
ret half %r
|
||||
}
|
||||
|
||||
; Check that the FP promotion will use a truncating FP_ROUND, so we can fold
|
||||
; away the (fpext (fp_round <result>)) here.
|
||||
|
||||
; CHECK-LABEL: test_copysign_extended:
|
||||
; CHECK-NEXT: fcvt s1, h1
|
||||
; CHECK-NEXT: fcvt s0, h0
|
||||
; CHECK-NEXT: movi.4s v2, #0x80, lsl #24
|
||||
; CHECK-NEXT: bit.16b v0, v1, v2
|
||||
; CHECK-NEXT: ret
|
||||
define float @test_copysign_extended(half %a, half %b) #0 {
|
||||
%r = call half @llvm.copysign.f16(half %a, half %b)
|
||||
%xr = fpext half %r to float
|
||||
ret float %xr
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_floor:
|
||||
; CHECK-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0
|
||||
; CHECK-NEXT: frintm [[INT32:s[0-9]+]], [[FLOAT32]]
|
||||
|
|
Loading…
Reference in New Issue