forked from OSchip/llvm-project
[CodeGen] Mark the promoted FCOPYSIGN result FP_ROUND as TRUNCating.
Now that we can properly promote mismatched FCOPYSIGNs (r244858), we can mark the FP_ROUND on the result as truncating, to expose folding. FCOPYSIGN doesn't change anything but the sign bit, so (fp_round (fcopysign (fpext a), b)) is equivalent to (modulo the sign bit): (fp_round (fpext a)) which is a no-op. llvm-svn: 244862
This commit is contained in:
parent
b2a9ed910e
commit
a196661bb0
|
@ -4301,8 +4301,15 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
|
||||||
Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
|
Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
|
||||||
Tmp2 = Node->getOperand(1);
|
Tmp2 = Node->getOperand(1);
|
||||||
Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
|
Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
|
||||||
|
|
||||||
|
// fcopysign doesn't change anything but the sign bit, so
|
||||||
|
// (fp_round (fcopysign (fpext a), b))
|
||||||
|
// is as precise as
|
||||||
|
// (fp_round (fpext a))
|
||||||
|
// which is a no-op. Mark it as a TRUNCating FP_ROUND.
|
||||||
|
const bool isTrunc = (Node->getOpcode() == ISD::FCOPYSIGN);
|
||||||
Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
|
Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
|
||||||
Tmp3, DAG.getIntPtrConstant(0, dl)));
|
Tmp3, DAG.getIntPtrConstant(isTrunc, dl)));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case ISD::FFLOOR:
|
case ISD::FFLOOR:
|
||||||
|
|
|
@ -702,6 +702,21 @@ define half @test_copysign_f64(half %a, double %b) #0 {
|
||||||
ret half %r
|
ret half %r
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; Check that the FP promotion will use a truncating FP_ROUND, so we can fold
|
||||||
|
; away the (fpext (fp_round <result>)) here.
|
||||||
|
|
||||||
|
; CHECK-LABEL: test_copysign_extended:
|
||||||
|
; CHECK-NEXT: fcvt s1, h1
|
||||||
|
; CHECK-NEXT: fcvt s0, h0
|
||||||
|
; CHECK-NEXT: movi.4s v2, #0x80, lsl #24
|
||||||
|
; CHECK-NEXT: bit.16b v0, v1, v2
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
define float @test_copysign_extended(half %a, half %b) #0 {
|
||||||
|
%r = call half @llvm.copysign.f16(half %a, half %b)
|
||||||
|
%xr = fpext half %r to float
|
||||||
|
ret float %xr
|
||||||
|
}
|
||||||
|
|
||||||
; CHECK-LABEL: test_floor:
|
; CHECK-LABEL: test_floor:
|
||||||
; CHECK-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0
|
; CHECK-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0
|
||||||
; CHECK-NEXT: frintm [[INT32:s[0-9]+]], [[FLOAT32]]
|
; CHECK-NEXT: frintm [[INT32:s[0-9]+]], [[FLOAT32]]
|
||||||
|
|
Loading…
Reference in New Issue