forked from OSchip/llvm-project
ARM: implement some simple f64 materializations.
Previously we used a const-pool load for virtually all 64-bit floating values. Actually, we can get quite a few common values (including 0.0, 1.0) via "vmov" instructions of one stripe or another. llvm-svn: 188773
This commit is contained in:
parent
dc985ef0af
commit
f79c3a5aef
|
@ -452,6 +452,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
|
||||||
}
|
}
|
||||||
|
|
||||||
setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
|
setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
|
||||||
|
setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
|
||||||
|
|
||||||
if (Subtarget->hasNEON()) {
|
if (Subtarget->hasNEON()) {
|
||||||
addDRTypeForNEON(MVT::v2f32);
|
addDRTypeForNEON(MVT::v2f32);
|
||||||
|
@ -4271,17 +4272,25 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
|
||||||
|
|
||||||
SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
|
SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
|
||||||
const ARMSubtarget *ST) const {
|
const ARMSubtarget *ST) const {
|
||||||
if (!ST->useNEONForSinglePrecisionFP() || !ST->hasVFP3() || ST->hasD16())
|
if (!ST->hasVFP3())
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
|
bool IsDouble = Op.getValueType() == MVT::f64;
|
||||||
ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
|
ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
|
||||||
assert(Op.getValueType() == MVT::f32 &&
|
|
||||||
"ConstantFP custom lowering should only occur for f32.");
|
|
||||||
|
|
||||||
// Try splatting with a VMOV.f32...
|
// Try splatting with a VMOV.f32...
|
||||||
APFloat FPVal = CFP->getValueAPF();
|
APFloat FPVal = CFP->getValueAPF();
|
||||||
int ImmVal = ARM_AM::getFP32Imm(FPVal);
|
int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
|
||||||
|
|
||||||
if (ImmVal != -1) {
|
if (ImmVal != -1) {
|
||||||
|
if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
|
||||||
|
// We have code in place to select a valid ConstantFP already, no need to
|
||||||
|
// do any mangling.
|
||||||
|
return Op;
|
||||||
|
}
|
||||||
|
|
||||||
|
// It's a float and we are trying to use NEON operations where
|
||||||
|
// possible. Lower it to a splat followed by an extract.
|
||||||
SDLoc DL(Op);
|
SDLoc DL(Op);
|
||||||
SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32);
|
SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32);
|
||||||
SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
|
SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
|
||||||
|
@ -4290,15 +4299,31 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
|
||||||
DAG.getConstant(0, MVT::i32));
|
DAG.getConstant(0, MVT::i32));
|
||||||
}
|
}
|
||||||
|
|
||||||
// If that fails, try a VMOV.i32
|
// The rest of our options are NEON only, make sure that's allowed before
|
||||||
|
// proceeding..
|
||||||
|
if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
EVT VMovVT;
|
EVT VMovVT;
|
||||||
unsigned iVal = FPVal.bitcastToAPInt().getZExtValue();
|
uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();
|
||||||
SDValue NewVal = isNEONModifiedImm(iVal, 0, 32, DAG, VMovVT, false,
|
|
||||||
VMOVModImm);
|
// It wouldn't really be worth bothering for doubles except for one very
|
||||||
|
// important value, which does happen to match: 0.0. So make sure we don't do
|
||||||
|
// anything stupid.
|
||||||
|
if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
// Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
|
||||||
|
SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, VMovVT,
|
||||||
|
false, VMOVModImm);
|
||||||
if (NewVal != SDValue()) {
|
if (NewVal != SDValue()) {
|
||||||
SDLoc DL(Op);
|
SDLoc DL(Op);
|
||||||
SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
|
SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
|
||||||
NewVal);
|
NewVal);
|
||||||
|
if (IsDouble)
|
||||||
|
return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
|
||||||
|
|
||||||
|
// It's a float: cast and extract a vector element.
|
||||||
SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
|
SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
|
||||||
VecConstant);
|
VecConstant);
|
||||||
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
|
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
|
||||||
|
@ -4306,11 +4331,16 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Finally, try a VMVN.i32
|
// Finally, try a VMVN.i32
|
||||||
NewVal = isNEONModifiedImm(~iVal & 0xffffffff, 0, 32, DAG, VMovVT, false,
|
NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, VMovVT,
|
||||||
VMVNModImm);
|
false, VMVNModImm);
|
||||||
if (NewVal != SDValue()) {
|
if (NewVal != SDValue()) {
|
||||||
SDLoc DL(Op);
|
SDLoc DL(Op);
|
||||||
SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
|
SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
|
||||||
|
|
||||||
|
if (IsDouble)
|
||||||
|
return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
|
||||||
|
|
||||||
|
// It's a float: cast and extract a vector element.
|
||||||
SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
|
SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
|
||||||
VecConstant);
|
VecConstant);
|
||||||
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
|
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
|
||||||
|
|
|
@ -0,0 +1,68 @@
|
||||||
|
; RUN: llc -mtriple=armv7 -mattr=+neon -mcpu=swift %s -o - | FileCheck %s
|
||||||
|
; RUN: llc -mtriple=armv7 -mattr=+neon -mcpu=cortex-a8 %s -o - | FileCheck --check-prefix=CHECK-NONEONFP %s
|
||||||
|
; RUN: llc -mtriple=armv7 -mattr=-neon -mcpu=cortex-a8 %s -o - | FileCheck --check-prefix=CHECK-NONEON %s
|
||||||
|
|
||||||
|
define arm_aapcs_vfpcc float @test_vmov_f32() {
|
||||||
|
; CHECK-LABEL: test_vmov_f32:
|
||||||
|
; CHECK: vmov.f32 d0, #1.0
|
||||||
|
|
||||||
|
; CHECK-NONEONFP: vmov.f32 s0, #1.0
|
||||||
|
ret float 1.0
|
||||||
|
}
|
||||||
|
|
||||||
|
define arm_aapcs_vfpcc float @test_vmov_imm() {
|
||||||
|
; CHECK-LABEL: test_vmov_imm:
|
||||||
|
; CHECK: vmov.i32 d0, #0
|
||||||
|
|
||||||
|
; CHECK-NONEON-LABEL: test_vmov_imm:
|
||||||
|
; CHECK_NONEON: vldr s0, {{.?LCPI[0-9]+_[0-9]+}}
|
||||||
|
ret float 0.0
|
||||||
|
}
|
||||||
|
|
||||||
|
define arm_aapcs_vfpcc float @test_vmvn_imm() {
|
||||||
|
; CHECK-LABEL: test_vmvn_imm:
|
||||||
|
; CHECK: vmvn.i32 d0, #0xb0000000
|
||||||
|
|
||||||
|
; CHECK-NONEON-LABEL: test_vmvn_imm:
|
||||||
|
; CHECK_NONEON: vldr s0, {{.?LCPI[0-9]+_[0-9]+}}
|
||||||
|
ret float 8589934080.0
|
||||||
|
}
|
||||||
|
|
||||||
|
define arm_aapcs_vfpcc double @test_vmov_f64() {
|
||||||
|
; CHECK-LABEL: test_vmov_f64:
|
||||||
|
; CHECK: vmov.f64 d0, #1.0
|
||||||
|
|
||||||
|
; CHECK-NONEON-LABEL: test_vmov_f64:
|
||||||
|
; CHECK_NONEON: vmov.f64 d0, #1.0
|
||||||
|
|
||||||
|
ret double 1.0
|
||||||
|
}
|
||||||
|
|
||||||
|
define arm_aapcs_vfpcc double @test_vmov_double_imm() {
|
||||||
|
; CHECK-LABEL: test_vmov_double_imm:
|
||||||
|
; CHECK: vmov.i32 d0, #0
|
||||||
|
|
||||||
|
; CHECK-NONEON-LABEL: test_vmov_double_imm:
|
||||||
|
; CHECK_NONEON: vldr d0, {{.?LCPI[0-9]+_[0-9]+}}
|
||||||
|
ret double 0.0
|
||||||
|
}
|
||||||
|
|
||||||
|
define arm_aapcs_vfpcc double @test_vmvn_double_imm() {
|
||||||
|
; CHECK-LABEL: test_vmvn_double_imm:
|
||||||
|
; CHECK: vmvn.i32 d0, #0xb0000000
|
||||||
|
|
||||||
|
; CHECK-NONEON-LABEL: test_vmvn_double_imm:
|
||||||
|
; CHECK_NONEON: vldr d0, {{.?LCPI[0-9]+_[0-9]+}}
|
||||||
|
ret double 0x4fffffff4fffffff
|
||||||
|
}
|
||||||
|
|
||||||
|
; Make sure we don't ignore the high half of 64-bit values when deciding whether
|
||||||
|
; a vmov/vmvn is possible.
|
||||||
|
define arm_aapcs_vfpcc double @test_notvmvn_double_imm() {
|
||||||
|
; CHECK-LABEL: test_notvmvn_double_imm:
|
||||||
|
; CHECK: vldr d0, {{.?LCPI[0-9]+_[0-9]+}}
|
||||||
|
|
||||||
|
; CHECK-NONEON-LABEL: test_notvmvn_double_imm:
|
||||||
|
; CHECK_NONEON: vldr d0, {{.?LCPI[0-9]+_[0-9]+}}
|
||||||
|
ret double 0x4fffffffffffffff
|
||||||
|
}
|
|
@ -239,10 +239,9 @@ bb14: ; preds = %bb6
|
||||||
; PR7157
|
; PR7157
|
||||||
define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind {
|
define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind {
|
||||||
; CHECK-LABEL: t9:
|
; CHECK-LABEL: t9:
|
||||||
; CHECK: vldr
|
; CHECK: vmov.i32 d16, #0x0
|
||||||
; CHECK-NOT: vmov d{{.*}}, d16
|
|
||||||
; CHECK: vmov.i32 d17
|
|
||||||
; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128]
|
; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128]
|
||||||
|
; CHECK-NEXT: vorr d17, d16, d16
|
||||||
; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128]
|
; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128]
|
||||||
%3 = bitcast double 0.000000e+00 to <2 x float> ; <<2 x float>> [#uses=2]
|
%3 = bitcast double 0.000000e+00 to <2 x float> ; <<2 x float>> [#uses=2]
|
||||||
%4 = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
|
%4 = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
|
||||||
|
|
Loading…
Reference in New Issue