forked from OSchip/llvm-project
ARM: use NOEN loads and stores if possible when handling struct byval.
This change is to be enabled in clang. rdar://9877866 llvm-svn: 158684
This commit is contained in:
parent
28cd12f265
commit
6e1fd46fdf
|
@ -6260,11 +6260,12 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
|
||||||
bool isThumb2 = Subtarget->isThumb2();
|
bool isThumb2 = Subtarget->isThumb2();
|
||||||
MachineFunction *MF = BB->getParent();
|
MachineFunction *MF = BB->getParent();
|
||||||
MachineRegisterInfo &MRI = MF->getRegInfo();
|
MachineRegisterInfo &MRI = MF->getRegInfo();
|
||||||
unsigned ldrOpc, strOpc, UnitSize;
|
unsigned ldrOpc, strOpc, UnitSize = 0;
|
||||||
|
|
||||||
const TargetRegisterClass *TRC = isThumb2 ?
|
const TargetRegisterClass *TRC = isThumb2 ?
|
||||||
(const TargetRegisterClass*)&ARM::tGPRRegClass :
|
(const TargetRegisterClass*)&ARM::tGPRRegClass :
|
||||||
(const TargetRegisterClass*)&ARM::GPRRegClass;
|
(const TargetRegisterClass*)&ARM::GPRRegClass;
|
||||||
|
const TargetRegisterClass *TRC_Vec = 0;
|
||||||
|
|
||||||
if (Align & 1) {
|
if (Align & 1) {
|
||||||
ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
|
ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
|
||||||
|
@ -6275,10 +6276,30 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
|
||||||
strOpc = isThumb2 ? ARM::t2STRH_POST : ARM::STRH_POST;
|
strOpc = isThumb2 ? ARM::t2STRH_POST : ARM::STRH_POST;
|
||||||
UnitSize = 2;
|
UnitSize = 2;
|
||||||
} else {
|
} else {
|
||||||
ldrOpc = isThumb2 ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
|
// Check whether we can use NEON instructions.
|
||||||
strOpc = isThumb2 ? ARM::t2STR_POST : ARM::STR_POST_IMM;
|
if (!MF->getFunction()->hasFnAttr(Attribute::NoImplicitFloat) &&
|
||||||
UnitSize = 4;
|
Subtarget->hasNEON()) {
|
||||||
|
if ((Align % 16 == 0) && SizeVal >= 16) {
|
||||||
|
ldrOpc = ARM::VLD1q32wb_fixed;
|
||||||
|
strOpc = ARM::VST1q32wb_fixed;
|
||||||
|
UnitSize = 16;
|
||||||
|
TRC_Vec = (const TargetRegisterClass*)&ARM::DPairRegClass;
|
||||||
|
}
|
||||||
|
else if ((Align % 8 == 0) && SizeVal >= 8) {
|
||||||
|
ldrOpc = ARM::VLD1d32wb_fixed;
|
||||||
|
strOpc = ARM::VST1d32wb_fixed;
|
||||||
|
UnitSize = 8;
|
||||||
|
TRC_Vec = (const TargetRegisterClass*)&ARM::DPRRegClass;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Can't use NEON instructions.
|
||||||
|
if (UnitSize == 0) {
|
||||||
|
ldrOpc = isThumb2 ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
|
||||||
|
strOpc = isThumb2 ? ARM::t2STR_POST : ARM::STR_POST_IMM;
|
||||||
|
UnitSize = 4;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned BytesLeft = SizeVal % UnitSize;
|
unsigned BytesLeft = SizeVal % UnitSize;
|
||||||
unsigned LoopSize = SizeVal - BytesLeft;
|
unsigned LoopSize = SizeVal - BytesLeft;
|
||||||
|
|
||||||
|
@ -6289,10 +6310,17 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
|
||||||
unsigned srcIn = src;
|
unsigned srcIn = src;
|
||||||
unsigned destIn = dest;
|
unsigned destIn = dest;
|
||||||
for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
|
for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
|
||||||
unsigned scratch = MRI.createVirtualRegister(TRC);
|
unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC);
|
||||||
unsigned srcOut = MRI.createVirtualRegister(TRC);
|
unsigned srcOut = MRI.createVirtualRegister(TRC);
|
||||||
unsigned destOut = MRI.createVirtualRegister(TRC);
|
unsigned destOut = MRI.createVirtualRegister(TRC);
|
||||||
if (isThumb2) {
|
if (UnitSize >= 8) {
|
||||||
|
AddDefaultPred(BuildMI(*BB, MI, dl,
|
||||||
|
TII->get(ldrOpc), scratch)
|
||||||
|
.addReg(srcOut, RegState::Define).addReg(srcIn).addImm(0));
|
||||||
|
|
||||||
|
AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
|
||||||
|
.addReg(destIn).addImm(0).addReg(scratch));
|
||||||
|
} else if (isThumb2) {
|
||||||
AddDefaultPred(BuildMI(*BB, MI, dl,
|
AddDefaultPred(BuildMI(*BB, MI, dl,
|
||||||
TII->get(ldrOpc), scratch)
|
TII->get(ldrOpc), scratch)
|
||||||
.addReg(srcOut, RegState::Define).addReg(srcIn).addImm(UnitSize));
|
.addReg(srcOut, RegState::Define).addReg(srcIn).addImm(UnitSize));
|
||||||
|
@ -6434,8 +6462,14 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
|
||||||
|
|
||||||
// [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
|
// [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
|
||||||
// [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
|
// [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
|
||||||
unsigned scratch = MRI.createVirtualRegister(TRC);
|
unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC);
|
||||||
if (isThumb2) {
|
if (UnitSize >= 8) {
|
||||||
|
AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
|
||||||
|
.addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(0));
|
||||||
|
|
||||||
|
AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
|
||||||
|
.addReg(destPhi).addImm(0).addReg(scratch));
|
||||||
|
} else if (isThumb2) {
|
||||||
AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
|
AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
|
||||||
.addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(UnitSize));
|
.addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(UnitSize));
|
||||||
|
|
||||||
|
|
|
@ -28,5 +28,19 @@ entry:
|
||||||
ret i32 0
|
ret i32 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; Generate a loop using NEON instructions
|
||||||
|
define i32 @h() nounwind ssp {
|
||||||
|
entry:
|
||||||
|
; CHECK: h:
|
||||||
|
; CHECK: vld1
|
||||||
|
; CHECK: sub
|
||||||
|
; CHECK: vst1
|
||||||
|
; CHECK: bne
|
||||||
|
%st = alloca %struct.LargeStruct, align 16
|
||||||
|
%call = call i32 @e3(%struct.LargeStruct* byval align 16 %st)
|
||||||
|
ret i32 0
|
||||||
|
}
|
||||||
|
|
||||||
declare i32 @e1(%struct.SmallStruct* nocapture byval %in) nounwind
|
declare i32 @e1(%struct.SmallStruct* nocapture byval %in) nounwind
|
||||||
declare i32 @e2(%struct.LargeStruct* nocapture byval %in) nounwind
|
declare i32 @e2(%struct.LargeStruct* nocapture byval %in) nounwind
|
||||||
|
declare i32 @e3(%struct.LargeStruct* nocapture byval align 16 %in) nounwind
|
||||||
|
|
Loading…
Reference in New Issue