forked from OSchip/llvm-project
Revert r163298 "Optimize codegen for VSETLNi{8,16,32} operating on Q registers."
Keep the integer_insertelement test case, the new coalescer can handle this kind of lane insertion without help from pseudo-instructions. llvm-svn: 166835
This commit is contained in:
parent
271fbb6445
commit
1f06e7f00e
|
@ -1208,57 +1208,6 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
|
|||
ExpandLaneOp(MBBI);
|
||||
return true;
|
||||
|
||||
case ARM::VSETLNi8Q:
|
||||
case ARM::VSETLNi16Q: {
|
||||
// Expand VSETLNs acting on a Q register to equivalent VSETLNs acting
|
||||
// on the respective D register.
|
||||
|
||||
unsigned QReg = MI.getOperand(1).getReg();
|
||||
unsigned QLane = MI.getOperand(3).getImm();
|
||||
|
||||
unsigned NewOpcode, DLane, DSubReg;
|
||||
switch (Opcode) {
|
||||
default: llvm_unreachable("Invalid opcode!");
|
||||
case ARM::VSETLNi8Q:
|
||||
// 4 possible 8-bit lanes per DPR:
|
||||
NewOpcode = ARM::VSETLNi8;
|
||||
DLane = QLane % 8;
|
||||
DSubReg = (QLane / 8) ? ARM::dsub_1 : ARM::dsub_0;
|
||||
break;
|
||||
case ARM::VSETLNi16Q:
|
||||
// 4 possible 16-bit lanes per DPR.
|
||||
NewOpcode = ARM::VSETLNi16;
|
||||
DLane = QLane % 4;
|
||||
DSubReg = (QLane / 4) ? ARM::dsub_1 : ARM::dsub_0;
|
||||
break;
|
||||
}
|
||||
|
||||
MachineInstrBuilder MIB =
|
||||
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpcode));
|
||||
|
||||
unsigned DReg = TRI->getSubReg(QReg, DSubReg);
|
||||
|
||||
MIB.addReg(DReg, RegState::Define); // Output DPR
|
||||
MIB.addReg(DReg); // Input DPR
|
||||
MIB.addOperand(MI.getOperand(2)); // Input GPR
|
||||
MIB.addImm(DLane); // Lane
|
||||
|
||||
// Add the predicate operands.
|
||||
MIB.addOperand(MI.getOperand(4));
|
||||
MIB.addOperand(MI.getOperand(5));
|
||||
|
||||
if (MI.getOperand(1).isKill()) // Add an implicit kill for the Q register.
|
||||
MIB->addRegisterKilled(QReg, TRI, true);
|
||||
// And an implicit def of the output register (which should always be the
|
||||
// same as the input register).
|
||||
MIB->addRegisterDefined(QReg, TRI);
|
||||
|
||||
TransferImpOps(MI, MIB, MIB);
|
||||
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false); return true;
|
||||
case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true;
|
||||
case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true;
|
||||
|
|
|
@ -5140,23 +5140,25 @@ def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V),
|
|||
GPR:$R, imm:$lane))]> {
|
||||
let Inst{21} = lane{0};
|
||||
}
|
||||
|
||||
def VSETLNi8Q : PseudoNeonI<(outs QPR:$V),
|
||||
(ins QPR:$src1, GPR:$R, VectorIndex8:$lane),
|
||||
IIC_VMOVISL, "",
|
||||
[(set QPR:$V, (vector_insert (v16i8 QPR:$src1),
|
||||
GPR:$R, imm:$lane))]>;
|
||||
def VSETLNi16Q : PseudoNeonI<(outs QPR:$V),
|
||||
(ins QPR:$src1, GPR:$R, VectorIndex16:$lane),
|
||||
IIC_VMOVISL, "",
|
||||
[(set QPR:$V, (vector_insert (v8i16 QPR:$src1),
|
||||
GPR:$R, imm:$lane))]>;
|
||||
}
|
||||
|
||||
def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
|
||||
(v16i8 (INSERT_SUBREG QPR:$src1,
|
||||
(v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1,
|
||||
(DSubReg_i8_reg imm:$lane))),
|
||||
GPR:$src2, (SubReg_i8_lane imm:$lane))),
|
||||
(DSubReg_i8_reg imm:$lane)))>;
|
||||
def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane),
|
||||
(v8i16 (INSERT_SUBREG QPR:$src1,
|
||||
(v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
|
||||
(DSubReg_i16_reg imm:$lane))),
|
||||
GPR:$src2, (SubReg_i16_lane imm:$lane))),
|
||||
(DSubReg_i16_reg imm:$lane)))>;
|
||||
def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane),
|
||||
(v4i32 (INSERT_SUBREG QPR:$src1,
|
||||
GPR:$src2,
|
||||
(SSubReg_f32_reg imm:$lane)))>;
|
||||
(v4i32 (INSERT_SUBREG QPR:$src1,
|
||||
(v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1,
|
||||
(DSubReg_i32_reg imm:$lane))),
|
||||
GPR:$src2, (SubReg_i32_lane imm:$lane))),
|
||||
(DSubReg_i32_reg imm:$lane)))>;
|
||||
|
||||
def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)),
|
||||
(INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)),
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
; CHECK: @f
|
||||
; CHECK-NOT: vorr d
|
||||
; CHECK: vmov s
|
||||
; CHECK: vmov.32 d
|
||||
; CHECK-NOT: vorr d
|
||||
; CHECK: mov pc, lr
|
||||
define <4 x i32> @f(<4 x i32> %in) {
|
||||
|
|
|
@ -200,7 +200,7 @@ define <8 x i16> @vsetQ_lane16(<8 x i16>* %A, i16 %B) nounwind {
|
|||
|
||||
define <4 x i32> @vsetQ_lane32(<4 x i32>* %A, i32 %B) nounwind {
|
||||
;CHECK: vsetQ_lane32:
|
||||
;CHECK: vmov s
|
||||
;CHECK: vmov.32 d{{.*}}[1], r1
|
||||
%tmp1 = load <4 x i32>* %A
|
||||
%tmp2 = insertelement <4 x i32> %tmp1, i32 %B, i32 1
|
||||
ret <4 x i32> %tmp2
|
||||
|
|
Loading…
Reference in New Issue