diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index caec11e108f5..f22c5b9ca4dd 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -452,6 +452,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } setOperationAction(ISD::ConstantFP, MVT::f32, Custom); + setOperationAction(ISD::ConstantFP, MVT::f64, Custom); if (Subtarget->hasNEON()) { addDRTypeForNEON(MVT::v2f32); @@ -4271,17 +4272,25 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST) const { - if (!ST->useNEONForSinglePrecisionFP() || !ST->hasVFP3() || ST->hasD16()) + if (!ST->hasVFP3()) return SDValue(); + bool IsDouble = Op.getValueType() == MVT::f64; ConstantFPSDNode *CFP = cast(Op); - assert(Op.getValueType() == MVT::f32 && - "ConstantFP custom lowering should only occur for f32."); // Try splatting with a VMOV.f32... APFloat FPVal = CFP->getValueAPF(); - int ImmVal = ARM_AM::getFP32Imm(FPVal); + int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal); + if (ImmVal != -1) { + if (IsDouble || !ST->useNEONForSinglePrecisionFP()) { + // We have code in place to select a valid ConstantFP already, no need to + // do any mangling. + return Op; + } + + // It's a float and we are trying to use NEON operations where + // possible. Lower it to a splat followed by an extract. SDLoc DL(Op); SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32); SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32, @@ -4290,15 +4299,31 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, DAG.getConstant(0, MVT::i32)); } - // If that fails, try a VMOV.i32 + // The rest of our options are NEON only, make sure that's allowed before + // proceeding.. + if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP())) + return SDValue(); + EVT VMovVT; - unsigned iVal = FPVal.bitcastToAPInt().getZExtValue(); - SDValue NewVal = isNEONModifiedImm(iVal, 0, 32, DAG, VMovVT, false, - VMOVModImm); + uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue(); + + // It wouldn't really be worth bothering for doubles except for one very + // important value, which does happen to match: 0.0. So make sure we don't do + // anything stupid. + if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32)) + return SDValue(); + + // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too). + SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, VMovVT, + false, VMOVModImm); if (NewVal != SDValue()) { SDLoc DL(Op); SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT, NewVal); + if (IsDouble) + return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant); + + // It's a float: cast and extract a vector element. SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, VecConstant); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant, @@ -4306,11 +4331,16 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, } // Finally, try a VMVN.i32 - NewVal = isNEONModifiedImm(~iVal & 0xffffffff, 0, 32, DAG, VMovVT, false, - VMVNModImm); + NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, VMovVT, + false, VMVNModImm); if (NewVal != SDValue()) { SDLoc DL(Op); SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal); + + if (IsDouble) + return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant); + + // It's a float: cast and extract a vector element. SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, VecConstant); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant, diff --git a/llvm/test/CodeGen/ARM/constantfp.ll b/llvm/test/CodeGen/ARM/constantfp.ll new file mode 100644 index 000000000000..974bdd729efc --- /dev/null +++ b/llvm/test/CodeGen/ARM/constantfp.ll @@ -0,0 +1,68 @@ +; RUN: llc -mtriple=armv7 -mattr=+neon -mcpu=swift %s -o - | FileCheck %s +; RUN: llc -mtriple=armv7 -mattr=+neon -mcpu=cortex-a8 %s -o - | FileCheck --check-prefix=CHECK-NONEONFP %s +; RUN: llc -mtriple=armv7 -mattr=-neon -mcpu=cortex-a8 %s -o - | FileCheck --check-prefix=CHECK-NONEON %s + +define arm_aapcs_vfpcc float @test_vmov_f32() { +; CHECK-LABEL: test_vmov_f32: +; CHECK: vmov.f32 d0, #1.0 + +; CHECK-NONEONFP: vmov.f32 s0, #1.0 + ret float 1.0 +} + +define arm_aapcs_vfpcc float @test_vmov_imm() { +; CHECK-LABEL: test_vmov_imm: +; CHECK: vmov.i32 d0, #0 + +; CHECK-NONEON-LABEL: test_vmov_imm: +; CHECK_NONEON: vldr s0, {{.?LCPI[0-9]+_[0-9]+}} + ret float 0.0 +} + +define arm_aapcs_vfpcc float @test_vmvn_imm() { +; CHECK-LABEL: test_vmvn_imm: +; CHECK: vmvn.i32 d0, #0xb0000000 + +; CHECK-NONEON-LABEL: test_vmvn_imm: +; CHECK_NONEON: vldr s0, {{.?LCPI[0-9]+_[0-9]+}} + ret float 8589934080.0 +} + +define arm_aapcs_vfpcc double @test_vmov_f64() { +; CHECK-LABEL: test_vmov_f64: +; CHECK: vmov.f64 d0, #1.0 + +; CHECK-NONEON-LABEL: test_vmov_f64: +; CHECK_NONEON: vmov.f64 d0, #1.0 + + ret double 1.0 +} + +define arm_aapcs_vfpcc double @test_vmov_double_imm() { +; CHECK-LABEL: test_vmov_double_imm: +; CHECK: vmov.i32 d0, #0 + +; CHECK-NONEON-LABEL: test_vmov_double_imm: +; CHECK_NONEON: vldr d0, {{.?LCPI[0-9]+_[0-9]+}} + ret double 0.0 +} + +define arm_aapcs_vfpcc double @test_vmvn_double_imm() { +; CHECK-LABEL: test_vmvn_double_imm: +; CHECK: vmvn.i32 d0, #0xb0000000 + +; CHECK-NONEON-LABEL: test_vmvn_double_imm: +; CHECK_NONEON: vldr d0, {{.?LCPI[0-9]+_[0-9]+}} + ret double 0x4fffffff4fffffff +} + +; Make sure we don't ignore the high half of 64-bit values when deciding whether +; a vmov/vmvn is possible. +define arm_aapcs_vfpcc double @test_notvmvn_double_imm() { +; CHECK-LABEL: test_notvmvn_double_imm: +; CHECK: vldr d0, {{.?LCPI[0-9]+_[0-9]+}} + +; CHECK-NONEON-LABEL: test_notvmvn_double_imm: +; CHECK_NONEON: vldr d0, {{.?LCPI[0-9]+_[0-9]+}} + ret double 0x4fffffffffffffff +} diff --git a/llvm/test/CodeGen/ARM/reg_sequence.ll b/llvm/test/CodeGen/ARM/reg_sequence.ll index 3fe2bb8e3828..25484f484853 100644 --- a/llvm/test/CodeGen/ARM/reg_sequence.ll +++ b/llvm/test/CodeGen/ARM/reg_sequence.ll @@ -239,10 +239,9 @@ bb14: ; preds = %bb6 ; PR7157 define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind { ; CHECK-LABEL: t9: -; CHECK: vldr -; CHECK-NOT: vmov d{{.*}}, d16 -; CHECK: vmov.i32 d17 +; CHECK: vmov.i32 d16, #0x0 ; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] +; CHECK-NEXT: vorr d17, d16, d16 ; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] %3 = bitcast double 0.000000e+00 to <2 x float> ; <<2 x float>> [#uses=2] %4 = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> ; <<4 x float>> [#uses=1]