forked from OSchip/llvm-project
[Sparc] Custom bitcast between f64 and v2i32
Summary: Currently bitcasting constants from f64 to v2i32 is done by storing the value to the stack and then loading it again. This is not necessary, but seems to happen because v2i32 is a valid type for Sparc V8. If it had not been legal, we would have gotten help from the type legalizer. This patch tries to do the same work as the legalizer would have done by bitcasting the floating point constant and splitting the value up into a vector of two i32 values. Reviewers: venkatra, jyknight Reviewed By: jyknight Subscribers: glaubitz, fedor.sergeev, jrtc27, llvm-commits Differential Revision: https://reviews.llvm.org/D49219 llvm-svn: 340723
This commit is contained in:
parent
fe28217048
commit
92dadc0bca
|
@ -852,12 +852,10 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
|
|||
if (VA.getLocVT() == MVT::f64) {
|
||||
// Move from the float value from float registers into the
|
||||
// integer registers.
|
||||
|
||||
// TODO: The f64 -> v2i32 conversion is super-inefficient for
|
||||
// constants: it sticks them in the constant pool, then loads
|
||||
// to a fp register, then stores to temp memory, then loads to
|
||||
// integer registers.
|
||||
Arg = DAG.getNode(ISD::BITCAST, dl, MVT::v2i32, Arg);
|
||||
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Arg))
|
||||
Arg = bitcastConstantFPToInt(C, dl, DAG);
|
||||
else
|
||||
Arg = DAG.getNode(ISD::BITCAST, dl, MVT::v2i32, Arg);
|
||||
}
|
||||
|
||||
SDValue Part0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
|
||||
|
@ -1801,6 +1799,10 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
|
|||
setOperationAction(ISD::FMUL, MVT::f32, Promote);
|
||||
}
|
||||
|
||||
// Custom combine bitcast between f64 and v2i32
|
||||
if (!Subtarget->is64Bit())
|
||||
setTargetDAGCombine(ISD::BITCAST);
|
||||
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
|
||||
|
||||
setMinFunctionAlignment(2);
|
||||
|
@ -3075,6 +3077,40 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
|||
}
|
||||
}
|
||||
|
||||
SDValue SparcTargetLowering::bitcastConstantFPToInt(ConstantFPSDNode *C,
|
||||
const SDLoc &DL,
|
||||
SelectionDAG &DAG) const {
|
||||
APInt V = C->getValueAPF().bitcastToAPInt();
|
||||
SDValue Lo = DAG.getConstant(V.zextOrTrunc(32), DL, MVT::i32);
|
||||
SDValue Hi = DAG.getConstant(V.lshr(32).zextOrTrunc(32), DL, MVT::i32);
|
||||
if (DAG.getDataLayout().isLittleEndian())
|
||||
std::swap(Lo, Hi);
|
||||
return DAG.getBuildVector(MVT::v2i32, DL, {Hi, Lo});
|
||||
}
|
||||
|
||||
SDValue SparcTargetLowering::PerformBITCASTCombine(SDNode *N,
|
||||
DAGCombinerInfo &DCI) const {
|
||||
SDLoc dl(N);
|
||||
SDValue Src = N->getOperand(0);
|
||||
|
||||
if (isa<ConstantFPSDNode>(Src) && N->getSimpleValueType(0) == MVT::v2i32 &&
|
||||
Src.getSimpleValueType() == MVT::f64)
|
||||
return bitcastConstantFPToInt(cast<ConstantFPSDNode>(Src), dl, DCI.DAG);
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue SparcTargetLowering::PerformDAGCombine(SDNode *N,
|
||||
DAGCombinerInfo &DCI) const {
|
||||
switch (N->getOpcode()) {
|
||||
default:
|
||||
break;
|
||||
case ISD::BITCAST:
|
||||
return PerformBITCASTCombine(N, DCI);
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
MachineBasicBlock *
|
||||
SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
|
||||
MachineBasicBlock *BB) const {
|
||||
|
|
|
@ -191,6 +191,13 @@ namespace llvm {
|
|||
|
||||
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue PerformBITCASTCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
|
||||
SDValue bitcastConstantFPToInt(ConstantFPSDNode *C, const SDLoc &DL,
|
||||
SelectionDAG &DAG) const;
|
||||
|
||||
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
|
||||
|
||||
bool ShouldShrinkFPConstant(EVT VT) const override {
|
||||
// Do not shrink FP constpool if VT == MVT::f128.
|
||||
// (ldd, call _Q_fdtoq) is more expensive than two ldds.
|
||||
|
|
|
@ -1,14 +1,14 @@
|
|||
; RUN: llc < %s -march=sparc | FileCheck %s
|
||||
|
||||
; TODO: actually fix the codegen to be optimal. At least we don't
|
||||
; crash for now, though...
|
||||
; RUN: llc < %s -march=sparcel | FileCheck %s --check-prefix=CHECK-LE
|
||||
|
||||
;; Bitcast should not do a runtime conversion, but rather emit a
|
||||
;; constant into integer registers directly.
|
||||
|
||||
; CHECK-LABEL: bitcast:
|
||||
; TODO-CHECK: sethi 1049856, %o0
|
||||
; TODO-CHECK: sethi 0, %o1
|
||||
; CHECK: sethi 1049856, %o0
|
||||
; CHECK: mov %g0, %o1
|
||||
; CHECK-LE: mov %g0, %o0
|
||||
; CHECK-LE: sethi 1049856, %o1
|
||||
define <2 x i32> @bitcast() {
|
||||
%1 = bitcast double 5.0 to <2 x i32>
|
||||
ret <2 x i32> %1
|
||||
|
@ -18,8 +18,10 @@ define <2 x i32> @bitcast() {
|
|||
;; registers)
|
||||
|
||||
; CHECK-LABEL: test_call
|
||||
; TODO-CHECK: sethi 1049856, %o0
|
||||
; TODO-CHECK: sethi 0, %o1
|
||||
; CHECK: sethi 1049856, %o0
|
||||
; CHECK: mov %g0, %o1
|
||||
; CHECK-LE: mov %g0, %o0
|
||||
; CHECK-LE: sethi 1049856, %o1
|
||||
declare void @a(double)
|
||||
define void @test_call() {
|
||||
call void @a(double 5.0)
|
||||
|
@ -32,8 +34,12 @@ define void @test_call() {
|
|||
;; due to an earlier broken workaround for this issue.)
|
||||
|
||||
; CHECK-LABEL: test_intrins_call
|
||||
; TODO-CHECK: sethi 1049856, %o0
|
||||
; TODO-CHECK: sethi 0, %o1
|
||||
; CHECK: sethi 1048576, %o0
|
||||
; CHECK: mov %g0, %o1
|
||||
; CHECK: mov %o0, %o2
|
||||
; CHECK: mov %o1, %o3
|
||||
; CHECK-LE: mov %g0, %o0
|
||||
; CHECK-LE: sethi 1048576, %o1
|
||||
declare double @llvm.pow.f64(double, double)
|
||||
define double @test_intrins_call() {
|
||||
%1 = call double @llvm.pow.f64(double 2.0, double 2.0)
|
||||
|
|
Loading…
Reference in New Issue