forked from OSchip/llvm-project
Implement Regression/CodeGen/X86/rotate.ll: emit rotate instructions (which
typically cost 1 cycle) instead of shld/shrd instruction (which are typically 6 or more cycles). This also saves code space. For example, instead of emitting: rotr: mov %EAX, DWORD PTR [%ESP + 4] mov %CL, BYTE PTR [%ESP + 8] shrd %EAX, %EAX, %CL ret rotli: mov %EAX, DWORD PTR [%ESP + 4] shrd %EAX, %EAX, 27 ret Emit: rotr32: mov %CL, BYTE PTR [%ESP + 8] mov %EAX, DWORD PTR [%ESP + 4] ror %EAX, %CL ret rotli32: mov %EAX, DWORD PTR [%ESP + 4] ror %EAX, 27 ret We also emit byte rotate instructions which do not have a sh[lr]d counterpart at all. llvm-svn: 19692
This commit is contained in:
parent
c4adfbbd0b
commit
de87d146ab
|
@ -1165,47 +1165,24 @@ bool ISel::EmitOrOpOp(SDOperand Op1, SDOperand Op2, unsigned DestReg) {
|
||||||
// Find out if ShrAmt = 32-ShlAmt or ShlAmt = 32-ShrAmt.
|
// Find out if ShrAmt = 32-ShlAmt or ShlAmt = 32-ShrAmt.
|
||||||
if (ShlAmt.getOpcode() == ISD::SUB && ShlAmt.getOperand(1) == ShrAmt)
|
if (ShlAmt.getOpcode() == ISD::SUB && ShlAmt.getOperand(1) == ShrAmt)
|
||||||
if (ConstantSDNode *SubCST = dyn_cast<ConstantSDNode>(ShlAmt.getOperand(0)))
|
if (ConstantSDNode *SubCST = dyn_cast<ConstantSDNode>(ShlAmt.getOperand(0)))
|
||||||
if (SubCST->getValue() == RegSize && RegSize != 8) {
|
if (SubCST->getValue() == RegSize) {
|
||||||
|
// (A >> ShrAmt) | (A << (32-ShrAmt)) ==> ROR A, ShrAmt
|
||||||
// (A >> ShrAmt) | (B << (32-ShrAmt)) ==> SHRD A, B, ShrAmt
|
// (A >> ShrAmt) | (B << (32-ShrAmt)) ==> SHRD A, B, ShrAmt
|
||||||
unsigned AReg, BReg;
|
if (ShrVal == ShlVal) {
|
||||||
if (getRegPressure(ShlVal) > getRegPressure(ShrVal)) {
|
unsigned Reg, ShAmt;
|
||||||
AReg = SelectExpr(ShrVal);
|
if (getRegPressure(ShrVal) > getRegPressure(ShrAmt)) {
|
||||||
BReg = SelectExpr(ShlVal);
|
Reg = SelectExpr(ShrVal);
|
||||||
} else {
|
ShAmt = SelectExpr(ShrAmt);
|
||||||
BReg = SelectExpr(ShlVal);
|
} else {
|
||||||
AReg = SelectExpr(ShrVal);
|
ShAmt = SelectExpr(ShrAmt);
|
||||||
}
|
Reg = SelectExpr(ShrVal);
|
||||||
unsigned ShAmt = SelectExpr(ShrAmt);
|
}
|
||||||
BuildMI(BB, X86::MOV8rr, 1, X86::CL).addReg(ShAmt);
|
BuildMI(BB, X86::MOV8rr, 1, X86::CL).addReg(ShAmt);
|
||||||
unsigned Opc = RegSize == 16 ? X86::SHRD16rrCL : X86::SHRD32rrCL;
|
unsigned Opc = RegSize == 8 ? X86::ROR8rCL :
|
||||||
BuildMI(BB, Opc, 2, DestReg).addReg(AReg).addReg(BReg);
|
(RegSize == 16 ? X86::ROR16rCL : X86::ROR32rCL);
|
||||||
return true;
|
BuildMI(BB, Opc, 1, DestReg).addReg(Reg);
|
||||||
}
|
return true;
|
||||||
|
} else if (RegSize != 8) {
|
||||||
if (ShrAmt.getOpcode() == ISD::SUB && ShrAmt.getOperand(1) == ShlAmt)
|
|
||||||
if (ConstantSDNode *SubCST = dyn_cast<ConstantSDNode>(ShrAmt.getOperand(0)))
|
|
||||||
if (SubCST->getValue() == RegSize && RegSize != 8) {
|
|
||||||
// (A << ShlAmt) | (B >> (32-ShlAmt)) ==> SHLD A, B, ShrAmt
|
|
||||||
unsigned AReg, BReg;
|
|
||||||
if (getRegPressure(ShlVal) > getRegPressure(ShrVal)) {
|
|
||||||
AReg = SelectExpr(ShrVal);
|
|
||||||
BReg = SelectExpr(ShlVal);
|
|
||||||
} else {
|
|
||||||
BReg = SelectExpr(ShlVal);
|
|
||||||
AReg = SelectExpr(ShrVal);
|
|
||||||
}
|
|
||||||
unsigned ShAmt = SelectExpr(ShlAmt);
|
|
||||||
BuildMI(BB, X86::MOV8rr, 1, X86::CL).addReg(ShAmt);
|
|
||||||
unsigned Opc = RegSize == 16 ? X86::SHLD16rrCL : X86::SHLD32rrCL;
|
|
||||||
BuildMI(BB, Opc, 2, DestReg).addReg(AReg).addReg(BReg);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ConstantSDNode *ShrCst = dyn_cast<ConstantSDNode>(ShrAmt))
|
|
||||||
if (ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(ShlAmt))
|
|
||||||
if (ShrCst->getValue() < RegSize && ShlCst->getValue() < RegSize) {
|
|
||||||
if (ShrCst->getValue() == RegSize-ShlCst->getValue() && RegSize != 8) {
|
|
||||||
// (A >> 5) | (B << 27) --> SHRD A, B, 5
|
|
||||||
unsigned AReg, BReg;
|
unsigned AReg, BReg;
|
||||||
if (getRegPressure(ShlVal) > getRegPressure(ShrVal)) {
|
if (getRegPressure(ShlVal) > getRegPressure(ShrVal)) {
|
||||||
AReg = SelectExpr(ShrVal);
|
AReg = SelectExpr(ShrVal);
|
||||||
|
@ -1214,14 +1191,78 @@ bool ISel::EmitOrOpOp(SDOperand Op1, SDOperand Op2, unsigned DestReg) {
|
||||||
BReg = SelectExpr(ShlVal);
|
BReg = SelectExpr(ShlVal);
|
||||||
AReg = SelectExpr(ShrVal);
|
AReg = SelectExpr(ShrVal);
|
||||||
}
|
}
|
||||||
unsigned Opc = RegSize == 16 ? X86::SHRD16rri8 : X86::SHRD32rri8;
|
unsigned ShAmt = SelectExpr(ShrAmt);
|
||||||
BuildMI(BB, Opc, 3, DestReg).addReg(AReg).addReg(BReg)
|
BuildMI(BB, X86::MOV8rr, 1, X86::CL).addReg(ShAmt);
|
||||||
.addImm(ShrCst->getValue());
|
unsigned Opc = RegSize == 16 ? X86::SHRD16rrCL : X86::SHRD32rrCL;
|
||||||
|
BuildMI(BB, Opc, 2, DestReg).addReg(AReg).addReg(BReg);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (ShrAmt.getOpcode() == ISD::SUB && ShrAmt.getOperand(1) == ShlAmt)
|
||||||
|
if (ConstantSDNode *SubCST = dyn_cast<ConstantSDNode>(ShrAmt.getOperand(0)))
|
||||||
|
if (SubCST->getValue() == RegSize) {
|
||||||
|
// (A << ShlAmt) | (A >> (32-ShlAmt)) ==> ROL A, ShrAmt
|
||||||
|
// (A << ShlAmt) | (B >> (32-ShlAmt)) ==> SHLD A, B, ShrAmt
|
||||||
|
if (ShrVal == ShlVal) {
|
||||||
|
unsigned Reg, ShAmt;
|
||||||
|
if (getRegPressure(ShrVal) > getRegPressure(ShlAmt)) {
|
||||||
|
Reg = SelectExpr(ShrVal);
|
||||||
|
ShAmt = SelectExpr(ShlAmt);
|
||||||
|
} else {
|
||||||
|
ShAmt = SelectExpr(ShlAmt);
|
||||||
|
Reg = SelectExpr(ShrVal);
|
||||||
|
}
|
||||||
|
BuildMI(BB, X86::MOV8rr, 1, X86::CL).addReg(ShAmt);
|
||||||
|
unsigned Opc = RegSize == 8 ? X86::ROL8rCL :
|
||||||
|
(RegSize == 16 ? X86::ROL16rCL : X86::ROL32rCL);
|
||||||
|
BuildMI(BB, Opc, 1, DestReg).addReg(Reg);
|
||||||
|
return true;
|
||||||
|
} else if (RegSize != 8) {
|
||||||
|
unsigned AReg, BReg;
|
||||||
|
if (getRegPressure(ShlVal) > getRegPressure(ShrVal)) {
|
||||||
|
AReg = SelectExpr(ShrVal);
|
||||||
|
BReg = SelectExpr(ShlVal);
|
||||||
|
} else {
|
||||||
|
BReg = SelectExpr(ShlVal);
|
||||||
|
AReg = SelectExpr(ShrVal);
|
||||||
|
}
|
||||||
|
unsigned ShAmt = SelectExpr(ShlAmt);
|
||||||
|
BuildMI(BB, X86::MOV8rr, 1, X86::CL).addReg(ShAmt);
|
||||||
|
unsigned Opc = RegSize == 16 ? X86::SHLD16rrCL : X86::SHLD32rrCL;
|
||||||
|
BuildMI(BB, Opc, 2, DestReg).addReg(AReg).addReg(BReg);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ConstantSDNode *ShrCst = dyn_cast<ConstantSDNode>(ShrAmt))
|
||||||
|
if (ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(ShlAmt))
|
||||||
|
if (ShrCst->getValue() < RegSize && ShlCst->getValue() < RegSize)
|
||||||
|
if (ShrCst->getValue() == RegSize-ShlCst->getValue()) {
|
||||||
|
// (A >> 5) | (A << 27) --> ROR A, 5
|
||||||
|
// (A >> 5) | (B << 27) --> SHRD A, B, 5
|
||||||
|
if (ShrVal == ShlVal) {
|
||||||
|
unsigned Reg = SelectExpr(ShrVal);
|
||||||
|
unsigned Opc = RegSize == 8 ? X86::ROR8ri :
|
||||||
|
(RegSize == 16 ? X86::ROR16ri : X86::ROR32ri);
|
||||||
|
BuildMI(BB, Opc, 2, DestReg).addReg(Reg).addImm(ShrCst->getValue());
|
||||||
|
return true;
|
||||||
|
} else if (RegSize != 8) {
|
||||||
|
unsigned AReg, BReg;
|
||||||
|
if (getRegPressure(ShlVal) > getRegPressure(ShrVal)) {
|
||||||
|
AReg = SelectExpr(ShrVal);
|
||||||
|
BReg = SelectExpr(ShlVal);
|
||||||
|
} else {
|
||||||
|
BReg = SelectExpr(ShlVal);
|
||||||
|
AReg = SelectExpr(ShrVal);
|
||||||
|
}
|
||||||
|
unsigned Opc = RegSize == 16 ? X86::SHRD16rri8 : X86::SHRD32rri8;
|
||||||
|
BuildMI(BB, Opc, 3, DestReg).addReg(AReg).addReg(BReg)
|
||||||
|
.addImm(ShrCst->getValue());
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue