forked from OSchip/llvm-project
Add support for getting & setting the FPSCR application register on ARM when VFP is enabled.
Add support for using the FPSCR in conjunction with the vcvtr instruction, for controlling fp to int rounding. Add support for the FLT_ROUNDS_ node now that the FPSCR is exposed. llvm-svn: 110152
This commit is contained in:
parent
be633d91d0
commit
b69b182191
|
@ -35,6 +35,20 @@ let TargetPrefix = "arm" in { // All intrinsics start with "llvm.arm.".
|
|||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VFP
|
||||
|
||||
let TargetPrefix = "arm" in { // All intrinsics start with "llvm.arm.".
|
||||
def int_arm_get_fpscr : GCCBuiltin<"__builtin_arm_get_fpscr">,
|
||||
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
|
||||
def int_arm_set_fpscr : GCCBuiltin<"__builtin_arm_set_fpscr">,
|
||||
Intrinsic<[], [llvm_i32_ty], [IntrWriteMem]>;
|
||||
def int_arm_vcvtr : Intrinsic<[llvm_float_ty], [llvm_anyfloat_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_arm_vcvtru : Intrinsic<[llvm_float_ty], [llvm_anyfloat_ty],
|
||||
[IntrNoMem]>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Advanced SIMD (NEON)
|
||||
|
||||
|
|
|
@ -176,6 +176,7 @@ getReservedRegs(const MachineFunction &MF) const {
|
|||
BitVector Reserved(getNumRegs());
|
||||
Reserved.set(ARM::SP);
|
||||
Reserved.set(ARM::PC);
|
||||
Reserved.set(ARM::FPSCR);
|
||||
if (STI.isTargetDarwin() || hasFP(MF))
|
||||
Reserved.set(FramePtr);
|
||||
// Some targets reserve R9.
|
||||
|
|
|
@ -474,10 +474,12 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
|
|||
}
|
||||
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
|
||||
|
||||
if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only())
|
||||
if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
|
||||
// Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
|
||||
// iff target supports vfp2.
|
||||
setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom);
|
||||
setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
|
||||
}
|
||||
|
||||
// We want to custom lower some of our intrinsics.
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
|
||||
|
@ -2764,6 +2766,24 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
|
|||
return DAG.getMergeValues(Ops, 2, dl);
|
||||
}
|
||||
|
||||
SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
// The rounding mode is in bits 23:22 of the FPSCR.
|
||||
// The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
|
||||
// The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
|
||||
// so that the shift + and get folded into a bitfield extract.
|
||||
DebugLoc dl = Op.getDebugLoc();
|
||||
SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
|
||||
DAG.getConstant(Intrinsic::arm_get_fpscr,
|
||||
MVT::i32));
|
||||
SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
|
||||
DAG.getConstant(1U << 22, MVT::i32));
|
||||
SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
|
||||
DAG.getConstant(22, MVT::i32));
|
||||
return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
|
||||
DAG.getConstant(3, MVT::i32));
|
||||
}
|
||||
|
||||
static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
|
||||
const ARMSubtarget *ST) {
|
||||
EVT VT = N->getValueType(0);
|
||||
|
@ -3705,6 +3725,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
|||
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
|
||||
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
|
||||
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
|
||||
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
|
|
|
@ -82,7 +82,7 @@ namespace llvm {
|
|||
|
||||
MEMBARRIER, // Memory barrier
|
||||
SYNCBARRIER, // Memory sync barrier
|
||||
|
||||
|
||||
VCEQ, // Vector compare equal.
|
||||
VCGE, // Vector compare greater than or equal.
|
||||
VCGEU, // Vector compare unsigned greater than or equal.
|
||||
|
@ -342,6 +342,7 @@ namespace llvm {
|
|||
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
|
||||
CallingConv::ID CallConv, bool isVarArg,
|
||||
|
|
|
@ -420,34 +420,35 @@ def VTOUIZS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010,
|
|||
|
||||
// And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR.
|
||||
// For disassembly only.
|
||||
|
||||
let Uses = [FPSCR] in {
|
||||
def VTOSIRD : AVConv1I<0b11101, 0b11, 0b1101, 0b1011,
|
||||
(outs SPR:$dst), (ins DPR:$a),
|
||||
IIC_fpCVTDI, "vcvtr", ".s32.f64\t$dst, $a",
|
||||
[/* For disassembly only; pattern left blank */]> {
|
||||
[(set SPR:$dst, (int_arm_vcvtr (f64 DPR:$a)))]> {
|
||||
let Inst{7} = 0; // Z bit
|
||||
}
|
||||
|
||||
def VTOSIRS : AVConv1In<0b11101, 0b11, 0b1101, 0b1010,
|
||||
(outs SPR:$dst), (ins SPR:$a),
|
||||
IIC_fpCVTSI, "vcvtr", ".s32.f32\t$dst, $a",
|
||||
[/* For disassembly only; pattern left blank */]> {
|
||||
[(set SPR:$dst, (int_arm_vcvtr SPR:$a))]> {
|
||||
let Inst{7} = 0; // Z bit
|
||||
}
|
||||
|
||||
def VTOUIRD : AVConv1I<0b11101, 0b11, 0b1100, 0b1011,
|
||||
(outs SPR:$dst), (ins DPR:$a),
|
||||
IIC_fpCVTDI, "vcvtr", ".u32.f64\t$dst, $a",
|
||||
[/* For disassembly only; pattern left blank */]> {
|
||||
[(set SPR:$dst, (int_arm_vcvtru (f64 DPR:$a)))]> {
|
||||
let Inst{7} = 0; // Z bit
|
||||
}
|
||||
|
||||
def VTOUIRS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010,
|
||||
(outs SPR:$dst), (ins SPR:$a),
|
||||
IIC_fpCVTSI, "vcvtr", ".u32.f32\t$dst, $a",
|
||||
[/* For disassembly only; pattern left blank */]> {
|
||||
[(set SPR:$dst, (int_arm_vcvtru SPR:$a))]> {
|
||||
let Inst{7} = 0; // Z bit
|
||||
}
|
||||
}
|
||||
|
||||
// Convert between floating-point and fixed-point
|
||||
// Data type for fixed-point naming convention:
|
||||
|
@ -654,32 +655,27 @@ def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs",
|
|||
}
|
||||
|
||||
// FPSCR <-> GPR (for disassembly only)
|
||||
|
||||
let neverHasSideEffects = 1 in {
|
||||
let Uses = [FPSCR] in {
|
||||
def VMRS : VFPAI<(outs GPR:$dst), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs",
|
||||
"\t$dst, fpscr",
|
||||
[/* For disassembly only; pattern left blank */]> {
|
||||
let hasSideEffects = 1, Uses = [FPSCR] in
|
||||
def VMRS : VFPAI<(outs GPR:$dst), (ins), VFPMiscFrm, IIC_fpSTAT,
|
||||
"vmrs", "\t$dst, fpscr",
|
||||
[(set GPR:$dst, (int_arm_get_fpscr))]> {
|
||||
let Inst{27-20} = 0b11101111;
|
||||
let Inst{19-16} = 0b0001;
|
||||
let Inst{11-8} = 0b1010;
|
||||
let Inst{7} = 0;
|
||||
let Inst{4} = 1;
|
||||
}
|
||||
}
|
||||
|
||||
let Defs = [FPSCR] in {
|
||||
def VMSR : VFPAI<(outs), (ins GPR:$src), VFPMiscFrm, IIC_fpSTAT, "vmsr",
|
||||
"\tfpscr, $src",
|
||||
[/* For disassembly only; pattern left blank */]> {
|
||||
let Defs = [FPSCR] in
|
||||
def VMSR : VFPAI<(outs), (ins GPR:$src), VFPMiscFrm, IIC_fpSTAT,
|
||||
"vmsr", "\tfpscr, $src",
|
||||
[(int_arm_set_fpscr GPR:$src)]> {
|
||||
let Inst{27-20} = 0b11101110;
|
||||
let Inst{19-16} = 0b0001;
|
||||
let Inst{11-8} = 0b1010;
|
||||
let Inst{7} = 0;
|
||||
let Inst{4} = 1;
|
||||
}
|
||||
}
|
||||
} // neverHasSideEffects
|
||||
|
||||
// Materialize FP immediates. VFP3 only.
|
||||
let isReMaterializable = 1 in {
|
||||
|
|
Loading…
Reference in New Issue