forked from OSchip/llvm-project
[x86] clean up code for converting 16-bit ops to LEA; NFC
As discussed in D55494, we want to extend this to handle 8-bit ops too, but that could be extended further to enable this on 32-bit systems too. llvm-svn: 348851
This commit is contained in:
parent
17b65c0d58
commit
05e36982dd
|
@ -794,90 +794,90 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Helper for convertToThreeAddress when 16-bit LEA is disabled, use 32-bit
|
|
||||||
/// LEA to form 3-address code by promoting to a 32-bit superregister and then
|
|
||||||
/// truncating back down to a 16-bit subregister.
|
|
||||||
MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
|
MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
|
||||||
unsigned MIOpc, MachineFunction::iterator &MFI, MachineInstr &MI,
|
unsigned MIOpc, MachineFunction::iterator &MFI, MachineInstr &MI,
|
||||||
LiveVariables *LV) const {
|
LiveVariables *LV) const {
|
||||||
MachineBasicBlock::iterator MBBI = MI.getIterator();
|
// TODO: For a 32-bit target, we need to adjust the LEA variables with
|
||||||
unsigned Dest = MI.getOperand(0).getReg();
|
// something like this:
|
||||||
unsigned Src = MI.getOperand(1).getReg();
|
// Opcode = X86::LEA32r;
|
||||||
bool isDead = MI.getOperand(0).isDead();
|
// InRegLEA = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
|
||||||
bool isKill = MI.getOperand(1).isKill();
|
// OutRegLEA =
|
||||||
assert(!MI.getOperand(1).isUndef() && "Undef op doesn't need optimization");
|
// Is8BitOp ? RegInfo.createVirtualRegister(&X86::GR32ABCD_RegClass)
|
||||||
|
// : RegInfo.createVirtualRegister(&X86::GR32RegClass);
|
||||||
|
if (!Subtarget.is64Bit())
|
||||||
|
return nullptr;
|
||||||
|
|
||||||
MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
|
MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
|
||||||
unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
|
unsigned Opcode = X86::LEA64_32r;
|
||||||
unsigned Opc, leaInReg;
|
unsigned InRegLEA = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
|
||||||
if (Subtarget.is64Bit()) {
|
unsigned OutRegLEA = RegInfo.createVirtualRegister(&X86::GR32RegClass);
|
||||||
Opc = X86::LEA64_32r;
|
|
||||||
leaInReg = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
|
|
||||||
} else {
|
|
||||||
Opc = X86::LEA32r;
|
|
||||||
leaInReg = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Build and insert into an implicit UNDEF value. This is OK because
|
// Build and insert into an implicit UNDEF value. This is OK because
|
||||||
// well be shifting and then extracting the lower 16-bits.
|
// we will be shifting and then extracting the lower 16-bits.
|
||||||
// This has the potential to cause partial register stall. e.g.
|
// This has the potential to cause partial register stall. e.g.
|
||||||
// movw (%rbp,%rcx,2), %dx
|
// movw (%rbp,%rcx,2), %dx
|
||||||
// leal -65(%rdx), %esi
|
// leal -65(%rdx), %esi
|
||||||
// But testing has shown this *does* help performance in 64-bit mode (at
|
// But testing has shown this *does* help performance in 64-bit mode (at
|
||||||
// least on modern x86 machines).
|
// least on modern x86 machines).
|
||||||
BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg);
|
MachineBasicBlock::iterator MBBI = MI.getIterator();
|
||||||
|
unsigned Dest = MI.getOperand(0).getReg();
|
||||||
|
unsigned Src = MI.getOperand(1).getReg();
|
||||||
|
bool IsDead = MI.getOperand(0).isDead();
|
||||||
|
bool IsKill = MI.getOperand(1).isKill();
|
||||||
|
assert(!MI.getOperand(1).isUndef() && "Undef op doesn't need optimization");
|
||||||
|
BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), InRegLEA);
|
||||||
MachineInstr *InsMI =
|
MachineInstr *InsMI =
|
||||||
BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY))
|
BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY))
|
||||||
.addReg(leaInReg, RegState::Define, X86::sub_16bit)
|
.addReg(InRegLEA, RegState::Define, X86::sub_16bit)
|
||||||
.addReg(Src, getKillRegState(isKill));
|
.addReg(Src, getKillRegState(IsKill));
|
||||||
|
|
||||||
MachineInstrBuilder MIB =
|
MachineInstrBuilder MIB =
|
||||||
BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(Opc), leaOutReg);
|
BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(Opcode), OutRegLEA);
|
||||||
switch (MIOpc) {
|
switch (MIOpc) {
|
||||||
default: llvm_unreachable("Unreachable!");
|
default: llvm_unreachable("Unreachable!");
|
||||||
case X86::SHL16ri: {
|
case X86::SHL16ri: {
|
||||||
unsigned ShAmt = MI.getOperand(2).getImm();
|
unsigned ShAmt = MI.getOperand(2).getImm();
|
||||||
MIB.addReg(0).addImm(1ULL << ShAmt)
|
MIB.addReg(0).addImm(1ULL << ShAmt)
|
||||||
.addReg(leaInReg, RegState::Kill).addImm(0).addReg(0);
|
.addReg(InRegLEA, RegState::Kill).addImm(0).addReg(0);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case X86::INC16r:
|
case X86::INC16r:
|
||||||
addRegOffset(MIB, leaInReg, true, 1);
|
addRegOffset(MIB, InRegLEA, true, 1);
|
||||||
break;
|
break;
|
||||||
case X86::DEC16r:
|
case X86::DEC16r:
|
||||||
addRegOffset(MIB, leaInReg, true, -1);
|
addRegOffset(MIB, InRegLEA, true, -1);
|
||||||
break;
|
break;
|
||||||
case X86::ADD16ri:
|
case X86::ADD16ri:
|
||||||
case X86::ADD16ri8:
|
case X86::ADD16ri8:
|
||||||
case X86::ADD16ri_DB:
|
case X86::ADD16ri_DB:
|
||||||
case X86::ADD16ri8_DB:
|
case X86::ADD16ri8_DB:
|
||||||
addRegOffset(MIB, leaInReg, true, MI.getOperand(2).getImm());
|
addRegOffset(MIB, InRegLEA, true, MI.getOperand(2).getImm());
|
||||||
break;
|
break;
|
||||||
case X86::ADD16rr:
|
case X86::ADD16rr:
|
||||||
case X86::ADD16rr_DB: {
|
case X86::ADD16rr_DB: {
|
||||||
unsigned Src2 = MI.getOperand(2).getReg();
|
unsigned Src2 = MI.getOperand(2).getReg();
|
||||||
bool isKill2 = MI.getOperand(2).isKill();
|
bool IsKill2 = MI.getOperand(2).isKill();
|
||||||
assert(!MI.getOperand(2).isUndef() && "Undef op doesn't need optimization");
|
assert(!MI.getOperand(2).isUndef() && "Undef op doesn't need optimization");
|
||||||
unsigned leaInReg2 = 0;
|
unsigned InRegLEA2 = 0;
|
||||||
MachineInstr *InsMI2 = nullptr;
|
MachineInstr *InsMI2 = nullptr;
|
||||||
if (Src == Src2) {
|
if (Src == Src2) {
|
||||||
// ADD16rr killed %reg1028, %reg1028
|
// ADD16rr killed %reg1028, %reg1028
|
||||||
// just a single insert_subreg.
|
// just a single insert_subreg.
|
||||||
addRegReg(MIB, leaInReg, true, leaInReg, false);
|
addRegReg(MIB, InRegLEA, true, InRegLEA, false);
|
||||||
} else {
|
} else {
|
||||||
if (Subtarget.is64Bit())
|
if (Subtarget.is64Bit())
|
||||||
leaInReg2 = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
|
InRegLEA2 = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
|
||||||
else
|
else
|
||||||
leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
|
InRegLEA2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
|
||||||
// Build and insert into an implicit UNDEF value. This is OK because
|
// Build and insert into an implicit UNDEF value. This is OK because
|
||||||
// well be shifting and then extracting the lower 16-bits.
|
// we will be shifting and then extracting the lower 16-bits.
|
||||||
BuildMI(*MFI, &*MIB, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2);
|
BuildMI(*MFI, &*MIB, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), InRegLEA2);
|
||||||
InsMI2 = BuildMI(*MFI, &*MIB, MI.getDebugLoc(), get(TargetOpcode::COPY))
|
InsMI2 = BuildMI(*MFI, &*MIB, MI.getDebugLoc(), get(TargetOpcode::COPY))
|
||||||
.addReg(leaInReg2, RegState::Define, X86::sub_16bit)
|
.addReg(InRegLEA2, RegState::Define, X86::sub_16bit)
|
||||||
.addReg(Src2, getKillRegState(isKill2));
|
.addReg(Src2, getKillRegState(IsKill2));
|
||||||
addRegReg(MIB, leaInReg, true, leaInReg2, true);
|
addRegReg(MIB, InRegLEA, true, InRegLEA2, true);
|
||||||
}
|
}
|
||||||
if (LV && isKill2 && InsMI2)
|
if (LV && IsKill2 && InsMI2)
|
||||||
LV->replaceKillInstruction(Src2, MI, *InsMI2);
|
LV->replaceKillInstruction(Src2, MI, *InsMI2);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -886,16 +886,16 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
|
||||||
MachineInstr *NewMI = MIB;
|
MachineInstr *NewMI = MIB;
|
||||||
MachineInstr *ExtMI =
|
MachineInstr *ExtMI =
|
||||||
BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY))
|
BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY))
|
||||||
.addReg(Dest, RegState::Define | getDeadRegState(isDead))
|
.addReg(Dest, RegState::Define | getDeadRegState(IsDead))
|
||||||
.addReg(leaOutReg, RegState::Kill, X86::sub_16bit);
|
.addReg(OutRegLEA, RegState::Kill, X86::sub_16bit);
|
||||||
|
|
||||||
if (LV) {
|
if (LV) {
|
||||||
// Update live variables
|
// Update live variables.
|
||||||
LV->getVarInfo(leaInReg).Kills.push_back(NewMI);
|
LV->getVarInfo(InRegLEA).Kills.push_back(NewMI);
|
||||||
LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI);
|
LV->getVarInfo(OutRegLEA).Kills.push_back(ExtMI);
|
||||||
if (isKill)
|
if (IsKill)
|
||||||
LV->replaceKillInstruction(Src, MI, *InsMI);
|
LV->replaceKillInstruction(Src, MI, *InsMI);
|
||||||
if (isDead)
|
if (IsDead)
|
||||||
LV->replaceKillInstruction(Dest, MI, *ExtMI);
|
LV->replaceKillInstruction(Dest, MI, *ExtMI);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -937,7 +937,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
|
||||||
MachineInstr *NewMI = nullptr;
|
MachineInstr *NewMI = nullptr;
|
||||||
bool is64Bit = Subtarget.is64Bit();
|
bool Is64Bit = Subtarget.is64Bit();
|
||||||
|
|
||||||
unsigned MIOpc = MI.getOpcode();
|
unsigned MIOpc = MI.getOpcode();
|
||||||
switch (MIOpc) {
|
switch (MIOpc) {
|
||||||
|
@ -967,7 +967,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
|
||||||
unsigned ShAmt = getTruncatedShiftCount(MI, 2);
|
unsigned ShAmt = getTruncatedShiftCount(MI, 2);
|
||||||
if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr;
|
if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr;
|
||||||
|
|
||||||
unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
|
unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
|
||||||
|
|
||||||
// LEA can't handle ESP.
|
// LEA can't handle ESP.
|
||||||
bool isKill;
|
bool isKill;
|
||||||
|
@ -996,14 +996,13 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
|
||||||
unsigned ShAmt = getTruncatedShiftCount(MI, 2);
|
unsigned ShAmt = getTruncatedShiftCount(MI, 2);
|
||||||
if (!isTruncatedShiftCountForLEA(ShAmt))
|
if (!isTruncatedShiftCountForLEA(ShAmt))
|
||||||
return nullptr;
|
return nullptr;
|
||||||
return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV)
|
return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
|
||||||
: nullptr;
|
|
||||||
}
|
}
|
||||||
case X86::INC64r:
|
case X86::INC64r:
|
||||||
case X86::INC32r: {
|
case X86::INC32r: {
|
||||||
assert(MI.getNumOperands() >= 2 && "Unknown inc instruction!");
|
assert(MI.getNumOperands() >= 2 && "Unknown inc instruction!");
|
||||||
unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r
|
unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r :
|
||||||
: (is64Bit ? X86::LEA64_32r : X86::LEA32r);
|
(Is64Bit ? X86::LEA64_32r : X86::LEA32r);
|
||||||
bool isKill;
|
bool isKill;
|
||||||
unsigned SrcReg;
|
unsigned SrcReg;
|
||||||
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
|
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
|
||||||
|
@ -1022,13 +1021,12 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case X86::INC16r:
|
case X86::INC16r:
|
||||||
return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV)
|
return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
|
||||||
: nullptr;
|
|
||||||
case X86::DEC64r:
|
case X86::DEC64r:
|
||||||
case X86::DEC32r: {
|
case X86::DEC32r: {
|
||||||
assert(MI.getNumOperands() >= 2 && "Unknown dec instruction!");
|
assert(MI.getNumOperands() >= 2 && "Unknown dec instruction!");
|
||||||
unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r
|
unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r
|
||||||
: (is64Bit ? X86::LEA64_32r : X86::LEA32r);
|
: (Is64Bit ? X86::LEA64_32r : X86::LEA32r);
|
||||||
|
|
||||||
bool isKill;
|
bool isKill;
|
||||||
unsigned SrcReg;
|
unsigned SrcReg;
|
||||||
|
@ -1048,8 +1046,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case X86::DEC16r:
|
case X86::DEC16r:
|
||||||
return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV)
|
return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
|
||||||
: nullptr;
|
|
||||||
case X86::ADD64rr:
|
case X86::ADD64rr:
|
||||||
case X86::ADD64rr_DB:
|
case X86::ADD64rr_DB:
|
||||||
case X86::ADD32rr:
|
case X86::ADD32rr:
|
||||||
|
@ -1059,7 +1056,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
|
||||||
if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB)
|
if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB)
|
||||||
Opc = X86::LEA64r;
|
Opc = X86::LEA64r;
|
||||||
else
|
else
|
||||||
Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
|
Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
|
||||||
|
|
||||||
bool isKill;
|
bool isKill;
|
||||||
unsigned SrcReg;
|
unsigned SrcReg;
|
||||||
|
@ -1089,8 +1086,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
|
||||||
}
|
}
|
||||||
case X86::ADD16rr:
|
case X86::ADD16rr:
|
||||||
case X86::ADD16rr_DB:
|
case X86::ADD16rr_DB:
|
||||||
return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV)
|
return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
|
||||||
: nullptr;
|
|
||||||
case X86::ADD64ri32:
|
case X86::ADD64ri32:
|
||||||
case X86::ADD64ri8:
|
case X86::ADD64ri8:
|
||||||
case X86::ADD64ri32_DB:
|
case X86::ADD64ri32_DB:
|
||||||
|
@ -1105,7 +1101,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
|
||||||
case X86::ADD32ri_DB:
|
case X86::ADD32ri_DB:
|
||||||
case X86::ADD32ri8_DB: {
|
case X86::ADD32ri8_DB: {
|
||||||
assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
|
assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
|
||||||
unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
|
unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
|
||||||
|
|
||||||
bool isKill;
|
bool isKill;
|
||||||
unsigned SrcReg;
|
unsigned SrcReg;
|
||||||
|
@ -1127,8 +1123,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
|
||||||
case X86::ADD16ri8:
|
case X86::ADD16ri8:
|
||||||
case X86::ADD16ri_DB:
|
case X86::ADD16ri_DB:
|
||||||
case X86::ADD16ri8_DB:
|
case X86::ADD16ri8_DB:
|
||||||
return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV)
|
return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
|
||||||
: nullptr;
|
|
||||||
case X86::VMOVDQU8Z128rmk:
|
case X86::VMOVDQU8Z128rmk:
|
||||||
case X86::VMOVDQU8Z256rmk:
|
case X86::VMOVDQU8Z256rmk:
|
||||||
case X86::VMOVDQU8Zrmk:
|
case X86::VMOVDQU8Zrmk:
|
||||||
|
|
|
@ -584,6 +584,9 @@ protected:
|
||||||
const MachineOperand *&Destination) const override;
|
const MachineOperand *&Destination) const override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
/// This is a helper for convertToThreeAddress for 16-bit instructions.
|
||||||
|
/// We use 32-bit LEA to form 3-address code by promoting to a 32-bit
|
||||||
|
/// super-register and then truncating back down to a 16-bit sub-register.
|
||||||
MachineInstr *convertToThreeAddressWithLEA(unsigned MIOpc,
|
MachineInstr *convertToThreeAddressWithLEA(unsigned MIOpc,
|
||||||
MachineFunction::iterator &MFI,
|
MachineFunction::iterator &MFI,
|
||||||
MachineInstr &MI,
|
MachineInstr &MI,
|
||||||
|
|
Loading…
Reference in New Issue