From e5b5d8fbb3390246de5d2110dfb7fca5f597d65c Mon Sep 17 00:00:00 2001 From: David Goodwin Date: Wed, 5 Aug 2009 21:02:22 +0000 Subject: [PATCH] When using NEON for single-precision FP, the NEON result must be placed in D0-D15 as these are the only D registers with S subregs. Introduce a new regclass to represent D0-D15 and use it in the NEON single-precision FP patterns. llvm-svn: 78244 --- llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp | 20 +++++++++----- llvm/lib/Target/ARM/ARMInstrNEON.td | 34 ++++++++++++++---------- llvm/lib/Target/ARM/ARMRegisterInfo.td | 8 ++++++ 3 files changed, 41 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 3819b430b2b7..9c5f3aab9b8b 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -610,23 +610,29 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB, if (I != MBB.end()) DL = I->getDebugLoc(); if (DestRC != SrcRC) { - // Not yet supported! - return false; + if (((DestRC == ARM::DPRRegisterClass) && (SrcRC == ARM::DPR_VFP2RegisterClass)) || + ((SrcRC == ARM::DPRRegisterClass) && (DestRC == ARM::DPR_VFP2RegisterClass))) { + // Allow copy between DPR and DPR_VFP2. + } else { + return false; + } } - if (DestRC == ARM::GPRRegisterClass) + if (DestRC == ARM::GPRRegisterClass) { AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg).addReg(SrcReg))); - else if (DestRC == ARM::SPRRegisterClass) + } else if (DestRC == ARM::SPRRegisterClass) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYS), DestReg) .addReg(SrcReg)); - else if (DestRC == ARM::DPRRegisterClass) + } else if ((DestRC == ARM::DPRRegisterClass) || + (DestRC == ARM::DPR_VFP2RegisterClass)) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYD), DestReg) .addReg(SrcReg)); - else if (DestRC == ARM::QPRRegisterClass) + } else if (DestRC == ARM::QPRRegisterClass) { BuildMI(MBB, I, DL, get(ARM::VMOVQ), DestReg).addReg(SrcReg); - else + } else { return false; + } return true; } diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td index 5cf81ee6af25..d31ec416ea0f 100644 --- a/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -285,9 +285,11 @@ class N2VQInt op24_23, bits<2> op21_20, bits<2> op19_18, // Basic 2-register operations, scalar single-precision class N2VDInts : NEONFPPat<(f32 (OpNode SPR:$a)), - (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$a, arm_ssubreg_0)), - arm_ssubreg_0)>; + (EXTRACT_SUBREG (COPY_TO_REGCLASS + (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), + SPR:$a, arm_ssubreg_0)), + DPR_VFP2), + arm_ssubreg_0)>; // Narrow 2-register intrinsics. class N2VNInt op24_23, bits<2> op21_20, bits<2> op19_18, @@ -329,11 +331,13 @@ class N3VQ op21_20, bits<4> op11_8, bit op4, // Basic 3-register operations, scalar single-precision class N3VDs : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), - (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$a, arm_ssubreg_0), - (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$b, arm_ssubreg_0)), - arm_ssubreg_0)>; + (EXTRACT_SUBREG (COPY_TO_REGCLASS + (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), + SPR:$a, arm_ssubreg_0), + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), + SPR:$b, arm_ssubreg_0)), + DPR_VFP2), + arm_ssubreg_0)>; // Basic 3-register intrinsics, both double- and quad-register. class N3VDInt op21_20, bits<4> op11_8, bit op4, @@ -375,12 +379,14 @@ class N3VQMulOp op21_20, bits<4> op11_8, bit op4, class N3VDMulOps : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), - (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$acc, arm_ssubreg_0), - (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$a, arm_ssubreg_0), - (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$b, arm_ssubreg_0)), + (EXTRACT_SUBREG (COPY_TO_REGCLASS + (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), + SPR:$acc, arm_ssubreg_0), + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), + SPR:$a, arm_ssubreg_0), + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), + SPR:$b, arm_ssubreg_0)), + DPR_VFP2), arm_ssubreg_0)>; // Neon 3-argument intrinsics, both double- and quad-register. diff --git a/llvm/lib/Target/ARM/ARMRegisterInfo.td b/llvm/lib/Target/ARM/ARMRegisterInfo.td index 77db711fb7b1..651eae54d442 100644 --- a/llvm/lib/Target/ARM/ARMRegisterInfo.td +++ b/llvm/lib/Target/ARM/ARMRegisterInfo.td @@ -305,6 +305,14 @@ def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64, }]; } +// Subset of DPR that are accessible with VFP2 (and so that also have +// 32-bit SPR subregs). +def DPR_VFP2 : RegisterClass<"ARM", [f64, v2f32], 64, + [D0, D1, D2, D3, D4, D5, D6, D7, + D8, D9, D10, D11, D12, D13, D14, D15]> { + let SubRegClassList = [SPR, SPR]; +} + // Generic 128-bit vector register class. def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7,