forked from OSchip/llvm-project
[ARM] Code-generation infrastructure for MVE.
This provides the low-level support to start using MVE vector types in LLVM IR, loading and storing them, passing them to __asm__ statements containing hand-written MVE vector instructions, and *if* you have the hard-float ABI turned on, using them as function parameters. (In the soft-float ABI, vector types are passed in integer registers, and combining all those 32-bit integers into a q-reg requires support for selection DAG nodes like insert_vector_elt and build_vector which aren't implemented yet for MVE. In fact I've also had to add `arm_aapcs_vfpcc` to a couple of existing tests to avoid that problem.) Specifically, this commit adds support for: * spills, reloads and register moves for MVE vector registers * ditto for the VPT predication mask that lives in VPR.P0 * make all the MVE vector types legal in ISel, and provide selection DAG patterns for BITCAST, LOAD and STORE * make loads and stores of scalar FP types conditional on `hasFPRegs()` rather than `hasVFP2Base()`. As a result a few existing tests needed their llc command lines updating to use `-mattr=-fpregs` as their method of turning off all hardware FP support. Reviewers: dmgreen, samparker, SjoerdMeijer Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D60708 llvm-svn: 364329
This commit is contained in:
parent
d0f96be2c7
commit
a4b415a683
|
@ -805,6 +805,28 @@ void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB,
|
|||
.addReg(ARM::CPSR, RegState::Implicit | RegState::Define);
|
||||
}
|
||||
|
||||
void llvm::addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB) {
|
||||
MIB.addImm(ARMVCC::None);
|
||||
MIB.addReg(0);
|
||||
}
|
||||
|
||||
void llvm::addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB,
|
||||
unsigned DestReg) {
|
||||
addUnpredicatedMveVpredNOp(MIB);
|
||||
MIB.addReg(DestReg, RegState::Undef);
|
||||
}
|
||||
|
||||
void llvm::addPredicatedMveVpredNOp(MachineInstrBuilder &MIB, unsigned Cond) {
|
||||
MIB.addImm(Cond);
|
||||
MIB.addReg(ARM::VPR, RegState::Implicit);
|
||||
}
|
||||
|
||||
void llvm::addPredicatedMveVpredROp(MachineInstrBuilder &MIB,
|
||||
unsigned Cond, unsigned Inactive) {
|
||||
addPredicatedMveVpredNOp(MIB, Cond);
|
||||
MIB.addReg(Inactive);
|
||||
}
|
||||
|
||||
void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I,
|
||||
const DebugLoc &DL, unsigned DestReg,
|
||||
|
@ -833,14 +855,17 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
|||
else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.hasFP64())
|
||||
Opc = ARM::VMOVD;
|
||||
else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
|
||||
Opc = ARM::VORRq;
|
||||
Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
|
||||
|
||||
if (Opc) {
|
||||
MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
|
||||
MIB.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
if (Opc == ARM::VORRq)
|
||||
if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR)
|
||||
MIB.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
MIB.add(predOps(ARMCC::AL));
|
||||
if (Opc == ARM::MVE_VORR)
|
||||
addUnpredicatedMveVpredROp(MIB, DestReg);
|
||||
else
|
||||
MIB.add(predOps(ARMCC::AL));
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -851,11 +876,11 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
|||
|
||||
// Use VORRq when possible.
|
||||
if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) {
|
||||
Opc = ARM::VORRq;
|
||||
Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
|
||||
BeginIdx = ARM::qsub_0;
|
||||
SubRegs = 2;
|
||||
} else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
|
||||
Opc = ARM::VORRq;
|
||||
Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
|
||||
BeginIdx = ARM::qsub_0;
|
||||
SubRegs = 4;
|
||||
// Fall back to VMOVD.
|
||||
|
@ -901,6 +926,30 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
|||
} else if (DestReg == ARM::CPSR) {
|
||||
copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget);
|
||||
return;
|
||||
} else if (DestReg == ARM::VPR) {
|
||||
assert(ARM::GPRPairRegClass.contains(SrcReg));
|
||||
BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_P0), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc))
|
||||
.add(predOps(ARMCC::AL));
|
||||
return;
|
||||
} else if (SrcReg == ARM::VPR) {
|
||||
assert(ARM::GPRPairRegClass.contains(DestReg));
|
||||
BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_P0), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc))
|
||||
.add(predOps(ARMCC::AL));
|
||||
return;
|
||||
} else if (DestReg == ARM::FPSCR_NZCV) {
|
||||
assert(ARM::GPRPairRegClass.contains(SrcReg));
|
||||
BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_FPSCR_NZCVQC), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc))
|
||||
.add(predOps(ARMCC::AL));
|
||||
return;
|
||||
} else if (SrcReg == ARM::FPSCR_NZCV) {
|
||||
assert(ARM::GPRPairRegClass.contains(DestReg));
|
||||
BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_FPSCR_NZCVQC), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc))
|
||||
.add(predOps(ARMCC::AL));
|
||||
return;
|
||||
}
|
||||
|
||||
assert(Opc && "Impossible reg-to-reg copy");
|
||||
|
@ -925,10 +974,15 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
|||
DstRegs.insert(Dst);
|
||||
#endif
|
||||
Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src);
|
||||
// VORR takes two source operands.
|
||||
if (Opc == ARM::VORRq)
|
||||
// VORR (NEON or MVE) takes two source operands.
|
||||
if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR) {
|
||||
Mov.addReg(Src);
|
||||
Mov = Mov.add(predOps(ARMCC::AL));
|
||||
}
|
||||
// MVE VORR takes predicate operands in place of an ordinary condition.
|
||||
if (Opc == ARM::MVE_VORR)
|
||||
addUnpredicatedMveVpredROp(Mov, Dst);
|
||||
else
|
||||
Mov = Mov.add(predOps(ARMCC::AL));
|
||||
// MOVr can set CC.
|
||||
if (Opc == ARM::MOVr)
|
||||
Mov = Mov.add(condCodeOp());
|
||||
|
@ -1010,6 +1064,13 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
|
|||
.addImm(0)
|
||||
.addMemOperand(MMO)
|
||||
.add(predOps(ARMCC::AL));
|
||||
} else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {
|
||||
BuildMI(MBB, I, DebugLoc(), get(ARM::VSTR_P0_off))
|
||||
.addReg(SrcReg, getKillRegState(isKill))
|
||||
.addFrameIndex(FI)
|
||||
.addImm(0)
|
||||
.addMemOperand(MMO)
|
||||
.add(predOps(ARMCC::AL));
|
||||
} else
|
||||
llvm_unreachable("Unknown reg class!");
|
||||
break;
|
||||
|
@ -1042,7 +1103,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
|
|||
llvm_unreachable("Unknown reg class!");
|
||||
break;
|
||||
case 16:
|
||||
if (ARM::DPairRegClass.hasSubClassEq(RC)) {
|
||||
if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
|
||||
// Use aligned spills if the stack can be realigned.
|
||||
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
|
||||
BuildMI(MBB, I, DebugLoc(), get(ARM::VST1q64))
|
||||
|
@ -1058,6 +1119,14 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
|
|||
.addMemOperand(MMO)
|
||||
.add(predOps(ARMCC::AL));
|
||||
}
|
||||
} else if (ARM::QPRRegClass.hasSubClassEq(RC) &&
|
||||
Subtarget.hasMVEIntegerOps()) {
|
||||
auto MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::MVE_VSTRWU32));
|
||||
MIB.addReg(SrcReg, getKillRegState(isKill))
|
||||
.addFrameIndex(FI)
|
||||
.addImm(0)
|
||||
.addMemOperand(MMO);
|
||||
addUnpredicatedMveVpredNOp(MIB);
|
||||
} else
|
||||
llvm_unreachable("Unknown reg class!");
|
||||
break;
|
||||
|
@ -1155,6 +1224,13 @@ unsigned ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
|
|||
return MI.getOperand(0).getReg();
|
||||
}
|
||||
break;
|
||||
case ARM::VSTR_P0_off:
|
||||
if (MI.getOperand(0).isFI() && MI.getOperand(1).isImm() &&
|
||||
MI.getOperand(1).getImm() == 0) {
|
||||
FrameIndex = MI.getOperand(0).getIndex();
|
||||
return ARM::P0;
|
||||
}
|
||||
break;
|
||||
case ARM::VST1q64:
|
||||
case ARM::VST1d64TPseudo:
|
||||
case ARM::VST1d64QPseudo:
|
||||
|
@ -1225,6 +1301,12 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
|
|||
.addImm(0)
|
||||
.addMemOperand(MMO)
|
||||
.add(predOps(ARMCC::AL));
|
||||
} else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {
|
||||
BuildMI(MBB, I, DL, get(ARM::VLDR_P0_off), DestReg)
|
||||
.addFrameIndex(FI)
|
||||
.addImm(0)
|
||||
.addMemOperand(MMO)
|
||||
.add(predOps(ARMCC::AL));
|
||||
} else
|
||||
llvm_unreachable("Unknown reg class!");
|
||||
break;
|
||||
|
@ -1261,7 +1343,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
|
|||
llvm_unreachable("Unknown reg class!");
|
||||
break;
|
||||
case 16:
|
||||
if (ARM::DPairRegClass.hasSubClassEq(RC)) {
|
||||
if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
|
||||
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
|
||||
BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
|
||||
.addFrameIndex(FI)
|
||||
|
@ -1274,6 +1356,13 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
|
|||
.addMemOperand(MMO)
|
||||
.add(predOps(ARMCC::AL));
|
||||
}
|
||||
} else if (ARM::QPRRegClass.hasSubClassEq(RC) &&
|
||||
Subtarget.hasMVEIntegerOps()) {
|
||||
auto MIB = BuildMI(MBB, I, DL, get(ARM::MVE_VLDRWU32), DestReg);
|
||||
MIB.addFrameIndex(FI)
|
||||
.addImm(0)
|
||||
.addMemOperand(MMO);
|
||||
addUnpredicatedMveVpredNOp(MIB);
|
||||
} else
|
||||
llvm_unreachable("Unknown reg class!");
|
||||
break;
|
||||
|
@ -1370,6 +1459,13 @@ unsigned ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
|
|||
return MI.getOperand(0).getReg();
|
||||
}
|
||||
break;
|
||||
case ARM::VLDR_P0_off:
|
||||
if (MI.getOperand(0).isFI() && MI.getOperand(1).isImm() &&
|
||||
MI.getOperand(1).getImm() == 0) {
|
||||
FrameIndex = MI.getOperand(0).getIndex();
|
||||
return ARM::P0;
|
||||
}
|
||||
break;
|
||||
case ARM::VLD1q64:
|
||||
case ARM::VLD1d8TPseudo:
|
||||
case ARM::VLD1d16TPseudo:
|
||||
|
|
|
@ -591,6 +591,13 @@ bool registerDefinedBetween(unsigned Reg, MachineBasicBlock::iterator From,
|
|||
MachineInstr *findCMPToFoldIntoCBZ(MachineInstr *Br,
|
||||
const TargetRegisterInfo *TRI);
|
||||
|
||||
void addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB);
|
||||
void addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB, unsigned DestReg);
|
||||
|
||||
void addPredicatedMveVpredNOp(MachineInstrBuilder &MIB, unsigned Cond);
|
||||
void addPredicatedMveVpredROp(MachineInstrBuilder &MIB, unsigned Cond,
|
||||
unsigned Inactive);
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H
|
||||
|
|
|
@ -146,6 +146,9 @@ public:
|
|||
SDValue &OffImm);
|
||||
bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
|
||||
SDValue &OffImm);
|
||||
template<unsigned Shift>
|
||||
bool SelectT2AddrModeImm7(SDValue N, SDValue &Base,
|
||||
SDValue &OffImm);
|
||||
bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
|
||||
SDValue &OffReg, SDValue &ShImm);
|
||||
bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
|
||||
|
@ -1268,6 +1271,35 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
|
|||
return false;
|
||||
}
|
||||
|
||||
template<unsigned Shift>
|
||||
bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N,
|
||||
SDValue &Base, SDValue &OffImm) {
|
||||
if (N.getOpcode() == ISD::SUB ||
|
||||
CurDAG->isBaseWithConstantOffset(N)) {
|
||||
if (auto RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
|
||||
int RHSC = (int)RHS->getZExtValue();
|
||||
if (N.getOpcode() == ISD::SUB)
|
||||
RHSC = -RHSC;
|
||||
|
||||
if (isShiftedInt<7, Shift>(RHSC)) {
|
||||
Base = N.getOperand(0);
|
||||
if (Base.getOpcode() == ISD::FrameIndex) {
|
||||
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
|
||||
Base = CurDAG->getTargetFrameIndex(
|
||||
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
|
||||
}
|
||||
OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Base only.
|
||||
Base = N;
|
||||
OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
|
||||
SDValue &Base,
|
||||
SDValue &OffReg, SDValue &ShImm) {
|
||||
|
|
|
@ -221,6 +221,26 @@ void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
|
|||
addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
|
||||
}
|
||||
|
||||
void ARMTargetLowering::addMVEVectorTypes() {
|
||||
// We 'support' these types up to bitcast/load/store level, regardless of
|
||||
// MVE integer-only / float support. Only doing FP data processing on the FP
|
||||
// vector types is inhibited at integer-only level.
|
||||
|
||||
const MVT VecTypes[] = {
|
||||
MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8,
|
||||
MVT::v2f64, MVT::v4f32, MVT::v8f16,
|
||||
};
|
||||
|
||||
for (auto VT : VecTypes) {
|
||||
addRegisterClass(VT, &ARM::QPRRegClass);
|
||||
for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
|
||||
setOperationAction(Opc, VT, Expand);
|
||||
setOperationAction(ISD::BITCAST, VT, Legal);
|
||||
setOperationAction(ISD::LOAD, VT, Legal);
|
||||
setOperationAction(ISD::STORE, VT, Legal);
|
||||
}
|
||||
}
|
||||
|
||||
ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
|
||||
const ARMSubtarget &STI)
|
||||
: TargetLowering(TM), Subtarget(&STI) {
|
||||
|
@ -510,7 +530,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
|
|||
else
|
||||
addRegisterClass(MVT::i32, &ARM::GPRRegClass);
|
||||
|
||||
if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
|
||||
if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
|
||||
!Subtarget->isThumb1Only()) {
|
||||
addRegisterClass(MVT::f32, &ARM::SPRRegClass);
|
||||
addRegisterClass(MVT::f64, &ARM::DPRRegClass);
|
||||
|
@ -548,6 +568,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
|
|||
setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom);
|
||||
setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom);
|
||||
|
||||
if (Subtarget->hasMVEIntegerOps())
|
||||
addMVEVectorTypes();
|
||||
|
||||
if (Subtarget->hasNEON()) {
|
||||
addDRTypeForNEON(MVT::v2f32);
|
||||
addDRTypeForNEON(MVT::v8i8);
|
||||
|
@ -566,11 +589,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
|
|||
addQRTypeForNEON(MVT::v8f16);
|
||||
addDRTypeForNEON(MVT::v4f16);
|
||||
}
|
||||
}
|
||||
|
||||
if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) {
|
||||
// v2f64 is legal so that QR subregs can be extracted as f64 elements, but
|
||||
// neither Neon nor VFP support any arithmetic operations on it.
|
||||
// The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
|
||||
// supported for v4f32.
|
||||
// none of Neon, MVE or VFP supports any arithmetic operations on it.
|
||||
setOperationAction(ISD::FADD, MVT::v2f64, Expand);
|
||||
setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
|
||||
setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
|
||||
|
@ -604,7 +627,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
|
|||
setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
|
||||
setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
|
||||
setOperationAction(ISD::FMA, MVT::v2f64, Expand);
|
||||
}
|
||||
|
||||
if (Subtarget->hasNEON()) {
|
||||
// The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
|
||||
// supported for v4f32.
|
||||
setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
|
||||
setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
|
||||
setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
|
||||
|
@ -1040,7 +1067,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
|
|||
}
|
||||
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
|
||||
|
||||
if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
|
||||
if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
|
||||
!Subtarget->isThumb1Only()) {
|
||||
// Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
|
||||
// iff target supports vfp2.
|
||||
|
|
|
@ -813,6 +813,7 @@ class VectorType;
|
|||
MachineBasicBlock *MBB) const;
|
||||
MachineBasicBlock *EmitLowered__dbzchk(MachineInstr &MI,
|
||||
MachineBasicBlock *MBB) const;
|
||||
void addMVEVectorTypes();
|
||||
};
|
||||
|
||||
enum NEONModImmType {
|
||||
|
|
|
@ -3998,3 +3998,126 @@ def MVE_LCTP : MVE_loltp_end<(outs), (ins pred:$p), "lctp${p}", ""> {
|
|||
let Unpredictable{21-20} = 0b11;
|
||||
let Unpredictable{11-1} = 0b11111111111;
|
||||
}
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class MVE_unpred_vector_store_typed<ValueType Ty, Instruction RegImmInst,
|
||||
PatFrag StoreKind, int shift>
|
||||
: Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr),
|
||||
(RegImmInst (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr)>;
|
||||
|
||||
multiclass MVE_unpred_vector_store<Instruction RegImmInst, PatFrag StoreKind,
|
||||
int shift> {
|
||||
def : MVE_unpred_vector_store_typed<v16i8, RegImmInst, StoreKind, shift>;
|
||||
def : MVE_unpred_vector_store_typed<v8i16, RegImmInst, StoreKind, shift>;
|
||||
def : MVE_unpred_vector_store_typed<v8f16, RegImmInst, StoreKind, shift>;
|
||||
def : MVE_unpred_vector_store_typed<v4i32, RegImmInst, StoreKind, shift>;
|
||||
def : MVE_unpred_vector_store_typed<v4f32, RegImmInst, StoreKind, shift>;
|
||||
def : MVE_unpred_vector_store_typed<v2i64, RegImmInst, StoreKind, shift>;
|
||||
}
|
||||
|
||||
class MVE_unpred_vector_load_typed<ValueType Ty, Instruction RegImmInst,
|
||||
PatFrag LoadKind, int shift>
|
||||
: Pat<(Ty (LoadKind t2addrmode_imm7<shift>:$addr)),
|
||||
(Ty (RegImmInst t2addrmode_imm7<shift>:$addr))>;
|
||||
multiclass MVE_unpred_vector_load<Instruction RegImmInst, PatFrag LoadKind,
|
||||
int shift> {
|
||||
def : MVE_unpred_vector_load_typed<v16i8, RegImmInst, LoadKind, shift>;
|
||||
def : MVE_unpred_vector_load_typed<v8i16, RegImmInst, LoadKind, shift>;
|
||||
def : MVE_unpred_vector_load_typed<v8f16, RegImmInst, LoadKind, shift>;
|
||||
def : MVE_unpred_vector_load_typed<v4i32, RegImmInst, LoadKind, shift>;
|
||||
def : MVE_unpred_vector_load_typed<v4f32, RegImmInst, LoadKind, shift>;
|
||||
def : MVE_unpred_vector_load_typed<v2i64, RegImmInst, LoadKind, shift>;
|
||||
}
|
||||
|
||||
let Predicates = [HasMVEInt, IsLE] in {
|
||||
defm : MVE_unpred_vector_store<MVE_VSTRBU8, byte_alignedstore, 0>;
|
||||
defm : MVE_unpred_vector_store<MVE_VSTRHU16, hword_alignedstore, 1>;
|
||||
defm : MVE_unpred_vector_store<MVE_VSTRWU32, alignedstore32, 2>;
|
||||
|
||||
defm : MVE_unpred_vector_load<MVE_VLDRBU8, byte_alignedload, 0>;
|
||||
defm : MVE_unpred_vector_load<MVE_VLDRHU16, hword_alignedload, 1>;
|
||||
defm : MVE_unpred_vector_load<MVE_VLDRWU32, alignedload32, 2>;
|
||||
|
||||
def : Pat<(v16i1 (load t2addrmode_imm7<2>:$addr)),
|
||||
(v16i1 (VLDR_P0_off t2addrmode_imm7<2>:$addr))>;
|
||||
def : Pat<(v8i1 (load t2addrmode_imm7<2>:$addr)),
|
||||
(v8i1 (VLDR_P0_off t2addrmode_imm7<2>:$addr))>;
|
||||
def : Pat<(v4i1 (load t2addrmode_imm7<2>:$addr)),
|
||||
(v4i1 (VLDR_P0_off t2addrmode_imm7<2>:$addr))>;
|
||||
}
|
||||
|
||||
let Predicates = [HasMVEInt, IsBE] in {
|
||||
def : MVE_unpred_vector_store_typed<v16i8, MVE_VSTRBU8, store, 0>;
|
||||
def : MVE_unpred_vector_store_typed<v8i16, MVE_VSTRHU16, alignedstore16, 1>;
|
||||
def : MVE_unpred_vector_store_typed<v8f16, MVE_VSTRHU16, alignedstore16, 1>;
|
||||
def : MVE_unpred_vector_store_typed<v4i32, MVE_VSTRWU32, alignedstore32, 2>;
|
||||
def : MVE_unpred_vector_store_typed<v4f32, MVE_VSTRWU32, alignedstore32, 2>;
|
||||
|
||||
def : MVE_unpred_vector_load_typed<v16i8, MVE_VLDRBU8, load, 0>;
|
||||
def : MVE_unpred_vector_load_typed<v8i16, MVE_VLDRHU16, alignedload16, 1>;
|
||||
def : MVE_unpred_vector_load_typed<v8f16, MVE_VLDRHU16, alignedload16, 1>;
|
||||
def : MVE_unpred_vector_load_typed<v4i32, MVE_VLDRWU32, alignedload32, 2>;
|
||||
def : MVE_unpred_vector_load_typed<v4f32, MVE_VLDRWU32, alignedload32, 2>;
|
||||
}
|
||||
|
||||
// Bit convert patterns
|
||||
|
||||
let Predicates = [HasMVEInt] in {
|
||||
def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>;
|
||||
def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>;
|
||||
|
||||
def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>;
|
||||
def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>;
|
||||
|
||||
def : Pat<(v8i16 (bitconvert (v8f16 QPR:$src))), (v8i16 QPR:$src)>;
|
||||
def : Pat<(v8f16 (bitconvert (v8i16 QPR:$src))), (v8f16 QPR:$src)>;
|
||||
}
|
||||
|
||||
let Predicates = [IsLE,HasMVEInt] in {
|
||||
def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
|
||||
def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
|
||||
def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (v2f64 QPR:$src)>;
|
||||
def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
|
||||
def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
|
||||
|
||||
def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
|
||||
def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
|
||||
def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (v2i64 QPR:$src)>;
|
||||
def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
|
||||
def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
|
||||
|
||||
def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
|
||||
def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
|
||||
def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (v4f32 QPR:$src)>;
|
||||
def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
|
||||
def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
|
||||
|
||||
def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
|
||||
def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
|
||||
def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (v4i32 QPR:$src)>;
|
||||
def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
|
||||
def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
|
||||
|
||||
def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (v8f16 QPR:$src)>;
|
||||
def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (v8f16 QPR:$src)>;
|
||||
def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (v8f16 QPR:$src)>;
|
||||
def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (v8f16 QPR:$src)>;
|
||||
def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (v8f16 QPR:$src)>;
|
||||
|
||||
def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
|
||||
def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
|
||||
def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
|
||||
def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
|
||||
def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
|
||||
|
||||
def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
|
||||
def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
|
||||
def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
|
||||
def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
|
||||
def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (v16i8 QPR:$src)>;
|
||||
def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
|
||||
}
|
||||
|
|
|
@ -468,8 +468,11 @@ def DPair : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
|
|||
128, (interleave QPR, TuplesOE2D)> {
|
||||
// Allocate starting at non-VFP2 registers D16-D31 first.
|
||||
// Prefer even-odd pairs as they are easier to copy.
|
||||
let AltOrders = [(add (rotl QPR, 8), (rotl DPair, 16))];
|
||||
let AltOrderSelect = [{ return 1; }];
|
||||
let AltOrders = [(add (rotl QPR, 8), (rotl DPair, 16)),
|
||||
(add (trunc QPR, 8), (trunc DPair, 16))];
|
||||
let AltOrderSelect = [{
|
||||
return 1 + MF.getSubtarget<ARMSubtarget>().hasMVEIntegerOps();
|
||||
}];
|
||||
}
|
||||
|
||||
// Pseudo-registers representing even-odd pairs of GPRs from R1 to R13/SP.
|
||||
|
|
|
@ -4,9 +4,9 @@
|
|||
; RUN: llc -fast-isel-sink-local-values < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -mattr=+long-calls | FileCheck %s --check-prefix=ARM-LONG --check-prefix=ARM-LONG-MACHO
|
||||
; RUN: llc -fast-isel-sink-local-values < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -mattr=+long-calls | FileCheck %s --check-prefix=ARM-LONG --check-prefix=ARM-LONG-ELF
|
||||
; RUN: llc -fast-isel-sink-local-values < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -mattr=+long-calls | FileCheck %s --check-prefix=THUMB-LONG
|
||||
; RUN: llc -fast-isel-sink-local-values < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -mattr=-vfp2d16sp | FileCheck %s --check-prefix=ARM-NOVFP
|
||||
; RUN: llc -fast-isel-sink-local-values < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -mattr=-vfp2d16sp | FileCheck %s --check-prefix=ARM-NOVFP
|
||||
; RUN: llc -fast-isel-sink-local-values < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -mattr=-vfp2d16sp | FileCheck %s --check-prefix=THUMB-NOVFP
|
||||
; RUN: llc -fast-isel-sink-local-values < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -mattr=-fpregs | FileCheck %s --check-prefix=ARM-NOVFP
|
||||
; RUN: llc -fast-isel-sink-local-values < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -mattr=-fpregs | FileCheck %s --check-prefix=ARM-NOVFP
|
||||
; RUN: llc -fast-isel-sink-local-values < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -mattr=-fpregs | FileCheck %s --check-prefix=THUMB-NOVFP
|
||||
|
||||
; Note that some of these tests assume that relocations are either
|
||||
; movw/movt or constant pool loads. Different platforms will select
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: llc -asm-verbose=false < %s -mattr=+vfp3,+fp16 | FileCheck -allow-deprecated-dag-overlap %s -check-prefix=CHECK-FP16 --check-prefix=CHECK-VFP -check-prefix=CHECK-ALL
|
||||
; RUN: llc -asm-verbose=false < %s | FileCheck -allow-deprecated-dag-overlap %s -check-prefix=CHECK-LIBCALL --check-prefix=CHECK-VFP -check-prefix=CHECK-ALL --check-prefix=CHECK-LIBCALL-VFP
|
||||
; RUN: llc -asm-verbose=false < %s -mattr=-vfp2d16sp | FileCheck -allow-deprecated-dag-overlap %s --check-prefix=CHECK-LIBCALL -check-prefix=CHECK-NOVFP -check-prefix=CHECK-ALL
|
||||
; RUN: llc -asm-verbose=false < %s -mattr=-fpregs | FileCheck -allow-deprecated-dag-overlap %s --check-prefix=CHECK-LIBCALL -check-prefix=CHECK-NOVFP -check-prefix=CHECK-ALL
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32"
|
||||
target triple = "armv7---eabihf"
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: llc < %s -mtriple=armv7-none-gnueabi -mattr=-neon,-vfp2d16sp | FileCheck --check-prefix=NONEON-NOVFP %s
|
||||
; RUN: llc < %s -mtriple=armv7-none-gnueabi -mattr=-neon,-fpregs | FileCheck --check-prefix=NONEON-NOVFP %s
|
||||
; RUN: llc < %s -mtriple=armv7-none-gnueabi -mattr=-neon | FileCheck --check-prefix=NONEON %s
|
||||
; RUN: llc < %s -mtriple=armv7-none-gnueabi -mattr=-vfp2d16sp | FileCheck --check-prefix=NOVFP %s
|
||||
; RUN: llc < %s -mtriple=armv7-none-gnueabi -mattr=-fpregs | FileCheck --check-prefix=NOVFP %s
|
||||
; RUN: llc < %s -mtriple=armv7-none-gnueabi -mattr=-neon,+vfp2 | FileCheck --check-prefix=NONEON-VFP %s
|
||||
|
||||
; Check no NEON instructions are selected when feature is disabled.
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -o - %s | FileCheck %s
|
||||
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -o - %s | FileCheck %s
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @vector_add_by_value(<4 x i32> %lhs, <4 x i32>%rhs) {
|
||||
; CHECK-LABEL: vector_add_by_value:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: @APP
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, q1
|
||||
; CHECK-NEXT: @NO_APP
|
||||
; CHECK-NEXT: bx lr
|
||||
%result = tail call <4 x i32> asm "vadd.i32 $0,$1,$2", "=t,t,t"(<4 x i32> %lhs, <4 x i32> %rhs)
|
||||
ret <4 x i32> %result
|
||||
}
|
||||
|
||||
define void @vector_add_by_reference(<4 x i32>* %resultp, <4 x i32>* %lhsp, <4 x i32>* %rhsp) {
|
||||
; CHECK-LABEL: vector_add_by_reference:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r1]
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r2]
|
||||
; CHECK-NEXT: @APP
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, q1
|
||||
; CHECK-NEXT: @NO_APP
|
||||
; CHECK-NEXT: vstrw.32 q0, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
%lhs = load <4 x i32>, <4 x i32>* %lhsp, align 16
|
||||
%rhs = load <4 x i32>, <4 x i32>* %rhsp, align 16
|
||||
%result = tail call <4 x i32> asm "vadd.i32 $0,$1,$2", "=t,t,t"(<4 x i32> %lhs, <4 x i32> %rhs)
|
||||
store <4 x i32> %result, <4 x i32>* %resultp, align 16
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,449 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s
|
||||
|
||||
define arm_aapcs_vfpcc <2 x i64> @bitcast_i64_i64(<2 x i64> %src) {
|
||||
; CHECK-LABEL: bitcast_i64_i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <2 x i64> %src to <2 x i64>
|
||||
ret <2 x i64> %r
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <2 x i64> @bitcast_i64_i32(<4 x i32> %src) {
|
||||
; CHECK-LABEL: bitcast_i64_i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <4 x i32> %src to <2 x i64>
|
||||
ret <2 x i64> %r
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <2 x i64> @bitcast_i64_i16(<8 x i16> %src) {
|
||||
; CHECK-LABEL: bitcast_i64_i16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <8 x i16> %src to <2 x i64>
|
||||
ret <2 x i64> %r
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <2 x i64> @bitcast_i64_i8(<16 x i8> %src) {
|
||||
; CHECK-LABEL: bitcast_i64_i8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <16 x i8> %src to <2 x i64>
|
||||
ret <2 x i64> %r
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <2 x i64> @bitcast_i64_f64(<2 x double> %src) {
|
||||
; CHECK-LABEL: bitcast_i64_f64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <2 x double> %src to <2 x i64>
|
||||
ret <2 x i64> %r
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <2 x i64> @bitcast_i64_f32(<4 x float> %src) {
|
||||
; CHECK-LABEL: bitcast_i64_f32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <4 x float> %src to <2 x i64>
|
||||
ret <2 x i64> %r
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <2 x i64> @bitcast_i64_f16(<8 x half> %src) {
|
||||
; CHECK-LABEL: bitcast_i64_f16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <8 x half> %src to <2 x i64>
|
||||
ret <2 x i64> %r
|
||||
}
|
||||
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @bitcast_i32_i64(<2 x i64> %src) {
|
||||
; CHECK-LABEL: bitcast_i32_i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <2 x i64> %src to <4 x i32>
|
||||
ret <4 x i32> %r
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @bitcast_i32_i32(<4 x i32> %src) {
|
||||
; CHECK-LABEL: bitcast_i32_i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <4 x i32> %src to <4 x i32>
|
||||
ret <4 x i32> %r
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @bitcast_i32_i16(<8 x i16> %src) {
|
||||
; CHECK-LABEL: bitcast_i32_i16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <8 x i16> %src to <4 x i32>
|
||||
ret <4 x i32> %r
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @bitcast_i32_i8(<16 x i8> %src) {
|
||||
; CHECK-LABEL: bitcast_i32_i8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <16 x i8> %src to <4 x i32>
|
||||
ret <4 x i32> %r
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @bitcast_i32_f64(<2 x double> %src) {
|
||||
; CHECK-LABEL: bitcast_i32_f64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <2 x double> %src to <4 x i32>
|
||||
ret <4 x i32> %r
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @bitcast_i32_f32(<4 x float> %src) {
|
||||
; CHECK-LABEL: bitcast_i32_f32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <4 x float> %src to <4 x i32>
|
||||
ret <4 x i32> %r
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @bitcast_i32_f16(<8 x half> %src) {
|
||||
; CHECK-LABEL: bitcast_i32_f16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <8 x half> %src to <4 x i32>
|
||||
ret <4 x i32> %r
|
||||
}
|
||||
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @bitcast_i16_i64(<2 x i64> %src) {
|
||||
; CHECK-LABEL: bitcast_i16_i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <2 x i64> %src to <8 x i16>
|
||||
ret <8 x i16> %r
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @bitcast_i16_i32(<4 x i32> %src) {
|
||||
; CHECK-LABEL: bitcast_i16_i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <4 x i32> %src to <8 x i16>
|
||||
ret <8 x i16> %r
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @bitcast_i16_i16(<8 x i16> %src) {
|
||||
; CHECK-LABEL: bitcast_i16_i16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <8 x i16> %src to <8 x i16>
|
||||
ret <8 x i16> %r
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @bitcast_i16_i8(<16 x i8> %src) {
|
||||
; CHECK-LABEL: bitcast_i16_i8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <16 x i8> %src to <8 x i16>
|
||||
ret <8 x i16> %r
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @bitcast_i16_f64(<2 x double> %src) {
|
||||
; CHECK-LABEL: bitcast_i16_f64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <2 x double> %src to <8 x i16>
|
||||
ret <8 x i16> %r
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @bitcast_i16_f32(<4 x float> %src) {
|
||||
; CHECK-LABEL: bitcast_i16_f32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <4 x float> %src to <8 x i16>
|
||||
ret <8 x i16> %r
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @bitcast_i16_f16(<8 x half> %src) {
|
||||
; CHECK-LABEL: bitcast_i16_f16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <8 x half> %src to <8 x i16>
|
||||
ret <8 x i16> %r
|
||||
}
|
||||
|
||||
|
||||
define arm_aapcs_vfpcc <16 x i8> @bitcast_i8_i64(<2 x i64> %src) {
|
||||
; CHECK-LABEL: bitcast_i8_i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <2 x i64> %src to <16 x i8>
|
||||
ret <16 x i8> %r
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <16 x i8> @bitcast_i8_i32(<4 x i32> %src) {
|
||||
; CHECK-LABEL: bitcast_i8_i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <4 x i32> %src to <16 x i8>
|
||||
ret <16 x i8> %r
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <16 x i8> @bitcast_i8_i16(<8 x i16> %src) {
|
||||
; CHECK-LABEL: bitcast_i8_i16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <8 x i16> %src to <16 x i8>
|
||||
ret <16 x i8> %r
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <16 x i8> @bitcast_i8_i8(<16 x i8> %src) {
|
||||
; CHECK-LABEL: bitcast_i8_i8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <16 x i8> %src to <16 x i8>
|
||||
ret <16 x i8> %r
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <16 x i8> @bitcast_i8_f64(<2 x double> %src) {
|
||||
; CHECK-LABEL: bitcast_i8_f64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <2 x double> %src to <16 x i8>
|
||||
ret <16 x i8> %r
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <16 x i8> @bitcast_i8_f32(<4 x float> %src) {
|
||||
; CHECK-LABEL: bitcast_i8_f32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <4 x float> %src to <16 x i8>
|
||||
ret <16 x i8> %r
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <16 x i8> @bitcast_i8_f16(<8 x half> %src) {
|
||||
; CHECK-LABEL: bitcast_i8_f16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <8 x half> %src to <16 x i8>
|
||||
ret <16 x i8> %r
|
||||
}
|
||||
|
||||
|
||||
define arm_aapcs_vfpcc <2 x double> @bitcast_f64_i64(<2 x i64> %src) {
|
||||
; CHECK-LABEL: bitcast_f64_i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <2 x i64> %src to <2 x double>
|
||||
ret <2 x double> %r
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <2 x double> @bitcast_f64_i32(<4 x i32> %src) {
|
||||
; CHECK-LABEL: bitcast_f64_i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <4 x i32> %src to <2 x double>
|
||||
ret <2 x double> %r
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <2 x double> @bitcast_f64_i16(<8 x i16> %src) {
|
||||
; CHECK-LABEL: bitcast_f64_i16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <8 x i16> %src to <2 x double>
|
||||
ret <2 x double> %r
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <2 x double> @bitcast_f64_i8(<16 x i8> %src) {
|
||||
; CHECK-LABEL: bitcast_f64_i8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <16 x i8> %src to <2 x double>
|
||||
ret <2 x double> %r
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <2 x double> @bitcast_f64_f64(<2 x double> %src) {
|
||||
; CHECK-LABEL: bitcast_f64_f64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <2 x double> %src to <2 x double>
|
||||
ret <2 x double> %r
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <2 x double> @bitcast_f64_f32(<4 x float> %src) {
|
||||
; CHECK-LABEL: bitcast_f64_f32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <4 x float> %src to <2 x double>
|
||||
ret <2 x double> %r
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <2 x double> @bitcast_f64_f16(<8 x half> %src) {
|
||||
; CHECK-LABEL: bitcast_f64_f16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <8 x half> %src to <2 x double>
|
||||
ret <2 x double> %r
|
||||
}
|
||||
|
||||
|
||||
define arm_aapcs_vfpcc <4 x float> @bitcast_f32_i64(<2 x i64> %src) {
|
||||
; CHECK-LABEL: bitcast_f32_i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <2 x i64> %src to <4 x float>
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x float> @bitcast_f32_i32(<4 x i32> %src) {
|
||||
; CHECK-LABEL: bitcast_f32_i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <4 x i32> %src to <4 x float>
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x float> @bitcast_f32_i16(<8 x i16> %src) {
|
||||
; CHECK-LABEL: bitcast_f32_i16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <8 x i16> %src to <4 x float>
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x float> @bitcast_f32_i8(<16 x i8> %src) {
|
||||
; CHECK-LABEL: bitcast_f32_i8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <16 x i8> %src to <4 x float>
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x float> @bitcast_f32_f64(<2 x double> %src) {
|
||||
; CHECK-LABEL: bitcast_f32_f64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <2 x double> %src to <4 x float>
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x float> @bitcast_f32_f32(<4 x float> %src) {
|
||||
; CHECK-LABEL: bitcast_f32_f32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <4 x float> %src to <4 x float>
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x float> @bitcast_f32_f16(<8 x half> %src) {
|
||||
; CHECK-LABEL: bitcast_f32_f16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <8 x half> %src to <4 x float>
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
|
||||
define arm_aapcs_vfpcc <8 x half> @bitcast_f16_i64(<2 x i64> %src) {
|
||||
; CHECK-LABEL: bitcast_f16_i64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <2 x i64> %src to <8 x half>
|
||||
ret <8 x half> %r
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x half> @bitcast_f16_i32(<4 x i32> %src) {
|
||||
; CHECK-LABEL: bitcast_f16_i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <4 x i32> %src to <8 x half>
|
||||
ret <8 x half> %r
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x half> @bitcast_f16_i16(<8 x i16> %src) {
|
||||
; CHECK-LABEL: bitcast_f16_i16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <8 x i16> %src to <8 x half>
|
||||
ret <8 x half> %r
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x half> @bitcast_f16_i8(<16 x i8> %src) {
|
||||
; CHECK-LABEL: bitcast_f16_i8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <16 x i8> %src to <8 x half>
|
||||
ret <8 x half> %r
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x half> @bitcast_f16_f64(<2 x double> %src) {
|
||||
; CHECK-LABEL: bitcast_f16_f64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <2 x double> %src to <8 x half>
|
||||
ret <8 x half> %r
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x half> @bitcast_f16_f32(<4 x float> %src) {
|
||||
; CHECK-LABEL: bitcast_f16_f32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <4 x float> %src to <8 x half>
|
||||
ret <8 x half> %r
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x half> @bitcast_f16_f16(<8 x half> %src) {
|
||||
; CHECK-LABEL: bitcast_f16_f16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%r = bitcast <8 x half> %src to <8 x half>
|
||||
ret <8 x half> %r
|
||||
}
|
|
@ -333,7 +333,7 @@ exit:
|
|||
; CHECK-MVE: [[LOOP_DEC]] = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 [[COUNT]], i32 1)
|
||||
; CHECK-MVE: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0
|
||||
; CHECK-MVE: br i1 [[CMP]], label %loop, label %exit
|
||||
define void @test_masked_i32(<4 x i1> %mask, <4 x i32>* %a, <4 x i32>* %b, <4 x i32>* %c, <4 x i32> %passthru) {
|
||||
define arm_aapcs_vfpcc void @test_masked_i32(<4 x i1> %mask, <4 x i32>* %a, <4 x i32>* %b, <4 x i32>* %c, <4 x i32> %passthru) {
|
||||
entry:
|
||||
br label %loop
|
||||
loop:
|
||||
|
@ -360,7 +360,7 @@ exit:
|
|||
; CHECK-MVE: [[LOOP_DEC]] = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 [[COUNT]], i32 1)
|
||||
; CHECK-MVE: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0
|
||||
; CHECK-MVE: br i1 [[CMP]], label %loop, label %exit
|
||||
define void @test_masked_f32(<4 x i1> %mask, <4 x float>* %a, <4 x float>* %b, <4 x float>* %c, <4 x float> %passthru) {
|
||||
define arm_aapcs_vfpcc void @test_masked_f32(<4 x i1> %mask, <4 x float>* %a, <4 x float>* %b, <4 x float>* %c, <4 x float> %passthru) {
|
||||
entry:
|
||||
br label %loop
|
||||
loop:
|
||||
|
@ -387,7 +387,7 @@ exit:
|
|||
; CHECK-MVE: [[LOOP_DEC]] = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 [[COUNT]], i32 1)
|
||||
; CHECK-MVE: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0
|
||||
; CHECK-MVE: br i1 [[CMP]], label %loop, label %exit
|
||||
define void @test_gather_scatter(<4 x i1> %mask, <4 x float*> %a, <4 x float*> %b, <4 x float*> %c, <4 x float> %passthru) {
|
||||
define arm_aapcs_vfpcc void @test_gather_scatter(<4 x i1> %mask, <4 x float*> %a, <4 x float*> %b, <4 x float*> %c, <4 x float> %passthru) {
|
||||
entry:
|
||||
br label %loop
|
||||
loop:
|
||||
|
|
Loading…
Reference in New Issue