[Sparc] Custom bitcast between f64 and v2i32

Summary:
Currently bitcasting constants from f64 to v2i32 is done by storing the
value to the stack and then loading it again. This is not necessary, but
seems to happen because v2i32 is a valid type for Sparc V8. If it had not
been legal, we would have gotten help from the type legalizer.

This patch tries to do the same work as the legalizer would have done by
bitcasting the floating point constant and splitting the value up into a
vector of two i32 values.

Reviewers: venkatra, jyknight

Reviewed By: jyknight

Subscribers: glaubitz, fedor.sergeev, jrtc27, llvm-commits

Differential Revision: https://reviews.llvm.org/D49219

llvm-svn: 340723
This commit is contained in:
Daniel Cederman 2018-08-27 07:14:53 +00:00
parent fe28217048
commit 92dadc0bca
3 changed files with 64 additions and 15 deletions

View File

@ -852,12 +852,10 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
if (VA.getLocVT() == MVT::f64) {
// Move from the float value from float registers into the
// integer registers.
// TODO: The f64 -> v2i32 conversion is super-inefficient for
// constants: it sticks them in the constant pool, then loads
// to a fp register, then stores to temp memory, then loads to
// integer registers.
Arg = DAG.getNode(ISD::BITCAST, dl, MVT::v2i32, Arg);
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Arg))
Arg = bitcastConstantFPToInt(C, dl, DAG);
else
Arg = DAG.getNode(ISD::BITCAST, dl, MVT::v2i32, Arg);
}
SDValue Part0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
@ -1801,6 +1799,10 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FMUL, MVT::f32, Promote);
}
// Custom combine bitcast between f64 and v2i32
if (!Subtarget->is64Bit())
setTargetDAGCombine(ISD::BITCAST);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setMinFunctionAlignment(2);
@ -3075,6 +3077,40 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
}
}
SDValue SparcTargetLowering::bitcastConstantFPToInt(ConstantFPSDNode *C,
const SDLoc &DL,
SelectionDAG &DAG) const {
APInt V = C->getValueAPF().bitcastToAPInt();
SDValue Lo = DAG.getConstant(V.zextOrTrunc(32), DL, MVT::i32);
SDValue Hi = DAG.getConstant(V.lshr(32).zextOrTrunc(32), DL, MVT::i32);
if (DAG.getDataLayout().isLittleEndian())
std::swap(Lo, Hi);
return DAG.getBuildVector(MVT::v2i32, DL, {Hi, Lo});
}
SDValue SparcTargetLowering::PerformBITCASTCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SDLoc dl(N);
SDValue Src = N->getOperand(0);
if (isa<ConstantFPSDNode>(Src) && N->getSimpleValueType(0) == MVT::v2i32 &&
Src.getSimpleValueType() == MVT::f64)
return bitcastConstantFPToInt(cast<ConstantFPSDNode>(Src), dl, DCI.DAG);
return SDValue();
}
SDValue SparcTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
switch (N->getOpcode()) {
default:
break;
case ISD::BITCAST:
return PerformBITCASTCombine(N, DCI);
}
return SDValue();
}
MachineBasicBlock *
SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const {

View File

@ -191,6 +191,13 @@ namespace llvm {
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue PerformBITCASTCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue bitcastConstantFPToInt(ConstantFPSDNode *C, const SDLoc &DL,
SelectionDAG &DAG) const;
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
bool ShouldShrinkFPConstant(EVT VT) const override {
// Do not shrink FP constpool if VT == MVT::f128.
// (ldd, call _Q_fdtoq) is more expensive than two ldds.

View File

@ -1,14 +1,14 @@
; RUN: llc < %s -march=sparc | FileCheck %s
; TODO: actually fix the codegen to be optimal. At least we don't
; crash for now, though...
; RUN: llc < %s -march=sparcel | FileCheck %s --check-prefix=CHECK-LE
;; Bitcast should not do a runtime conversion, but rather emit a
;; constant into integer registers directly.
; CHECK-LABEL: bitcast:
; TODO-CHECK: sethi 1049856, %o0
; TODO-CHECK: sethi 0, %o1
; CHECK: sethi 1049856, %o0
; CHECK: mov %g0, %o1
; CHECK-LE: mov %g0, %o0
; CHECK-LE: sethi 1049856, %o1
define <2 x i32> @bitcast() {
%1 = bitcast double 5.0 to <2 x i32>
ret <2 x i32> %1
@ -18,8 +18,10 @@ define <2 x i32> @bitcast() {
;; registers)
; CHECK-LABEL: test_call
; TODO-CHECK: sethi 1049856, %o0
; TODO-CHECK: sethi 0, %o1
; CHECK: sethi 1049856, %o0
; CHECK: mov %g0, %o1
; CHECK-LE: mov %g0, %o0
; CHECK-LE: sethi 1049856, %o1
declare void @a(double)
define void @test_call() {
call void @a(double 5.0)
@ -32,8 +34,12 @@ define void @test_call() {
;; due to an earlier broken workaround for this issue.)
; CHECK-LABEL: test_intrins_call
; TODO-CHECK: sethi 1049856, %o0
; TODO-CHECK: sethi 0, %o1
; CHECK: sethi 1048576, %o0
; CHECK: mov %g0, %o1
; CHECK: mov %o0, %o2
; CHECK: mov %o1, %o3
; CHECK-LE: mov %g0, %o0
; CHECK-LE: sethi 1048576, %o1
declare double @llvm.pow.f64(double, double)
define double @test_intrins_call() {
%1 = call double @llvm.pow.f64(double 2.0, double 2.0)