forked from OSchip/llvm-project
Change VST1 instructions for loading Q register values to operate on pairs
of D registers. Add a separate VST1q instruction with a Q register source operand for use by storeRegToStackSlot. llvm-svn: 99265
This commit is contained in:
parent
3f7842232e
commit
cc0a2a75a0
|
@ -738,7 +738,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
|
|||
RC == ARM::QPR_VFP2RegisterClass) && "Unknown regclass!");
|
||||
// FIXME: Neon instructions should support predicates
|
||||
if (Align >= 16 && (getRegisterInfo().canRealignStack(MF))) {
|
||||
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64))
|
||||
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q))
|
||||
.addFrameIndex(FI).addImm(128)
|
||||
.addMemOperand(MMO)
|
||||
.addReg(SrcReg, getKillRegState(isKill)));
|
||||
|
|
|
@ -132,9 +132,9 @@ private:
|
|||
unsigned *QOpcodes0, unsigned *QOpcodes1);
|
||||
|
||||
/// SelectVST - Select NEON store intrinsics. NumVecs should
|
||||
/// be 2, 3 or 4. The opcode arrays specify the instructions used for
|
||||
/// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
|
||||
/// stores of D registers and even subregs and odd subregs of Q registers.
|
||||
/// For NumVecs == 2, QOpcodes1 is not used.
|
||||
/// For NumVecs <= 2, QOpcodes1 is not used.
|
||||
SDNode *SelectVST(SDNode *N, unsigned NumVecs, unsigned *DOpcodes,
|
||||
unsigned *QOpcodes0, unsigned *QOpcodes1);
|
||||
|
||||
|
@ -1048,7 +1048,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
|
|||
case MVT::v4f32:
|
||||
case MVT::v4i32: OpcodeIndex = 2; break;
|
||||
case MVT::v2i64: OpcodeIndex = 3;
|
||||
assert(NumVecs == 1 && "v2i64 type only supported for VLD1/VST1");
|
||||
assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1112,7 +1112,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
|
|||
SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
|
||||
unsigned *DOpcodes, unsigned *QOpcodes0,
|
||||
unsigned *QOpcodes1) {
|
||||
assert(NumVecs >=2 && NumVecs <= 4 && "VST NumVecs out-of-range");
|
||||
assert(NumVecs >=1 && NumVecs <= 4 && "VST NumVecs out-of-range");
|
||||
DebugLoc dl = N->getDebugLoc();
|
||||
|
||||
SDValue MemAddr, Align;
|
||||
|
@ -1137,6 +1137,9 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
|
|||
case MVT::v8i16: OpcodeIndex = 1; break;
|
||||
case MVT::v4f32:
|
||||
case MVT::v4i32: OpcodeIndex = 2; break;
|
||||
case MVT::v2i64: OpcodeIndex = 3;
|
||||
assert(NumVecs == 1 && "v2i64 type only supported for VST1");
|
||||
break;
|
||||
}
|
||||
|
||||
SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
|
||||
|
@ -1157,9 +1160,9 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
|
|||
}
|
||||
|
||||
EVT RegVT = GetNEONSubregVT(VT);
|
||||
if (NumVecs == 2) {
|
||||
// Quad registers are directly supported for VST2,
|
||||
// storing 2 pairs of D regs.
|
||||
if (NumVecs <= 2) {
|
||||
// Quad registers are directly supported for VST1 and VST2,
|
||||
// storing pairs of D regs.
|
||||
unsigned Opc = QOpcodes0[OpcodeIndex];
|
||||
for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
|
||||
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
|
||||
|
@ -1170,7 +1173,8 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
|
|||
Ops.push_back(Pred);
|
||||
Ops.push_back(Reg0); // predicate register
|
||||
Ops.push_back(Chain);
|
||||
return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 9);
|
||||
return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(),
|
||||
5 + 2 * NumVecs);
|
||||
}
|
||||
|
||||
// Otherwise, quad registers are stored with two separate instructions,
|
||||
|
@ -1894,9 +1898,17 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
|
|||
return SelectVLDSTLane(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
|
||||
}
|
||||
|
||||
case Intrinsic::arm_neon_vst1: {
|
||||
unsigned DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
|
||||
ARM::VST1d32, ARM::VST1d64 };
|
||||
unsigned QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
|
||||
ARM::VST1q32, ARM::VST1q64 };
|
||||
return SelectVST(N, 1, DOpcodes, QOpcodes, 0);
|
||||
}
|
||||
|
||||
case Intrinsic::arm_neon_vst2: {
|
||||
unsigned DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
|
||||
ARM::VST2d32, ARM::VST2d64 };
|
||||
ARM::VST2d32, ARM::VST1q64 };
|
||||
unsigned QOpcodes[] = { ARM::VST2q8, ARM::VST2q16, ARM::VST2q32 };
|
||||
return SelectVST(N, 2, DOpcodes, QOpcodes, 0);
|
||||
}
|
||||
|
|
|
@ -141,6 +141,7 @@ def VLD1q_UPD
|
|||
} // mayLoad = 1
|
||||
|
||||
// Use vstmia to store a Q register as a D register pair.
|
||||
// This is equivalent to VSTMD except that it has a Q register operand.
|
||||
def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr), IIC_fpStorem,
|
||||
"vstmia", "$addr, ${src:dregpair}",
|
||||
[(store (v2f64 QPR:$src), addrmode4:$addr)]> {
|
||||
|
@ -151,6 +152,20 @@ def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr), IIC_fpStorem,
|
|||
let Inst{11-8} = 0b1011;
|
||||
}
|
||||
|
||||
let mayStore = 1 in {
|
||||
// Use vst1 to store a Q register as a D register pair.
|
||||
// This alternative to VSTRQ allows an alignment to be specified.
|
||||
// This is equivalent to VST1q64 except that it has a Q register operand.
|
||||
def VST1q
|
||||
: NLdSt<0,0b00,0b1010,0b1100, (outs), (ins addrmode6:$addr, QPR:$src),
|
||||
IIC_VST, "vst1", "64", "${src:dregpair}, $addr", "", []>;
|
||||
def VST1q_UPD
|
||||
: NLdSt<0,0b00,0b1010,0b1100, (outs GPR:$wb),
|
||||
(ins addrmode6:$addr, am6offset:$offset, QPR:$src),
|
||||
IIC_VST, "vst1", "64", "{$src:dregpair}, $addr$offset",
|
||||
"$addr.addr = $wb", []>;
|
||||
} // mayStore = 1
|
||||
|
||||
let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
|
||||
|
||||
// VLD1 : Vector Load (multiple single elements)
|
||||
|
@ -477,32 +492,27 @@ def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32">;
|
|||
// FIXME: Not yet implemented.
|
||||
} // mayLoad = 1, hasExtraDefRegAllocReq = 1
|
||||
|
||||
// VST1 : Vector Store (multiple single elements)
|
||||
class VST1D<bits<4> op7_4, string Dt, ValueType Ty>
|
||||
: NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST,
|
||||
"vst1", Dt, "\\{$src\\}, $addr", "",
|
||||
[(int_arm_neon_vst1 addrmode6:$addr, (Ty DPR:$src))]>;
|
||||
class VST1Q<bits<4> op7_4, string Dt, ValueType Ty>
|
||||
: NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$addr, QPR:$src), IIC_VST,
|
||||
"vst1", Dt, "${src:dregpair}, $addr", "",
|
||||
[(int_arm_neon_vst1 addrmode6:$addr, (Ty QPR:$src))]>;
|
||||
|
||||
let hasExtraSrcRegAllocReq = 1 in {
|
||||
def VST1d8 : VST1D<0b0000, "8", v8i8>;
|
||||
def VST1d16 : VST1D<0b0100, "16", v4i16>;
|
||||
def VST1d32 : VST1D<0b1000, "32", v2i32>;
|
||||
def VST1df : VST1D<0b1000, "32", v2f32>;
|
||||
def VST1d64 : VST1D<0b1100, "64", v1i64>;
|
||||
|
||||
def VST1q8 : VST1Q<0b0000, "8", v16i8>;
|
||||
def VST1q16 : VST1Q<0b0100, "16", v8i16>;
|
||||
def VST1q32 : VST1Q<0b1000, "32", v4i32>;
|
||||
def VST1qf : VST1Q<0b1000, "32", v4f32>;
|
||||
def VST1q64 : VST1Q<0b1100, "64", v2i64>;
|
||||
} // hasExtraSrcRegAllocReq
|
||||
|
||||
let mayStore = 1, hasExtraSrcRegAllocReq = 1 in {
|
||||
|
||||
// VST1 : Vector Store (multiple single elements)
|
||||
class VST1D<bits<4> op7_4, string Dt>
|
||||
: NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST,
|
||||
"vst1", Dt, "\\{$src\\}, $addr", "", []>;
|
||||
class VST1Q<bits<4> op7_4, string Dt>
|
||||
: NLdSt<0,0b00,0b1010,op7_4, (outs),
|
||||
(ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST,
|
||||
"vst1", Dt, "\\{$src1, $src2\\}, $addr", "", []>;
|
||||
|
||||
def VST1d8 : VST1D<0b0000, "8">;
|
||||
def VST1d16 : VST1D<0b0100, "16">;
|
||||
def VST1d32 : VST1D<0b1000, "32">;
|
||||
def VST1d64 : VST1D<0b1100, "64">;
|
||||
|
||||
def VST1q8 : VST1Q<0b0000, "8">;
|
||||
def VST1q16 : VST1Q<0b0100, "16">;
|
||||
def VST1q32 : VST1Q<0b1000, "32">;
|
||||
def VST1q64 : VST1Q<0b1100, "64">;
|
||||
|
||||
// ...with address register writeback:
|
||||
class VST1DWB<bits<4> op7_4, string Dt>
|
||||
: NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb),
|
||||
|
@ -582,9 +592,6 @@ class VST2Q<bits<4> op7_4, string Dt>
|
|||
def VST2d8 : VST2D<0b1000, 0b0000, "8">;
|
||||
def VST2d16 : VST2D<0b1000, 0b0100, "16">;
|
||||
def VST2d32 : VST2D<0b1000, 0b1000, "32">;
|
||||
def VST2d64 : NLdSt<0,0b00,0b1010,0b1100, (outs),
|
||||
(ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST,
|
||||
"vst1", "64", "\\{$src1, $src2\\}, $addr", "", []>;
|
||||
|
||||
def VST2q8 : VST2Q<0b0000, "8">;
|
||||
def VST2q16 : VST2Q<0b0100, "16">;
|
||||
|
@ -606,11 +613,6 @@ class VST2QWB<bits<4> op7_4, string Dt>
|
|||
def VST2d8_UPD : VST2DWB<0b1000, 0b0000, "8">;
|
||||
def VST2d16_UPD : VST2DWB<0b1000, 0b0100, "16">;
|
||||
def VST2d32_UPD : VST2DWB<0b1000, 0b1000, "32">;
|
||||
def VST2d64_UPD : NLdSt<0,0b00,0b1010,0b1100, (outs GPR:$wb),
|
||||
(ins addrmode6:$addr, am6offset:$offset,
|
||||
DPR:$src1, DPR:$src2), IIC_VST,
|
||||
"vst1", "64", "\\{$src1, $src2\\}, $addr$offset",
|
||||
"$addr.addr = $wb", []>;
|
||||
|
||||
def VST2q8_UPD : VST2QWB<0b0000, "8">;
|
||||
def VST2q16_UPD : VST2QWB<0b0100, "16">;
|
||||
|
|
|
@ -50,10 +50,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
|
|||
case ARM::VLD1q16:
|
||||
case ARM::VLD1q32:
|
||||
case ARM::VLD1q64:
|
||||
FirstOpnd = 0;
|
||||
NumRegs = 2;
|
||||
return true;
|
||||
|
||||
case ARM::VLD2d8:
|
||||
case ARM::VLD2d16:
|
||||
case ARM::VLD2d32:
|
||||
|
@ -177,10 +173,13 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
|
|||
Stride = 2;
|
||||
return true;
|
||||
|
||||
case ARM::VST1q8:
|
||||
case ARM::VST1q16:
|
||||
case ARM::VST1q32:
|
||||
case ARM::VST1q64:
|
||||
case ARM::VST2d8:
|
||||
case ARM::VST2d16:
|
||||
case ARM::VST2d32:
|
||||
case ARM::VST2d64:
|
||||
case ARM::VST2LNd8:
|
||||
case ARM::VST2LNd16:
|
||||
case ARM::VST2LNd32:
|
||||
|
|
Loading…
Reference in New Issue