Implement Regression/CodeGen/X86/rotate.ll: emit rotate instructions (which

typically cost 1 cycle) instead of shld/shrd instruction (which are typically
6 or more cycles).  This also saves code space.

For example, instead of emitting:

rotr:
        mov %EAX, DWORD PTR [%ESP + 4]
        mov %CL, BYTE PTR [%ESP + 8]
        shrd %EAX, %EAX, %CL
        ret
rotli:
        mov %EAX, DWORD PTR [%ESP + 4]
        shrd %EAX, %EAX, 27
        ret

Emit:

rotr32:
        mov %CL, BYTE PTR [%ESP + 8]
        mov %EAX, DWORD PTR [%ESP + 4]
        ror %EAX, %CL
        ret
rotli32:
        mov %EAX, DWORD PTR [%ESP + 4]
        ror %EAX, 27
        ret

We also emit byte rotate instructions which do not have a sh[lr]d counterpart
at all.

llvm-svn: 19692
This commit is contained in:
Chris Lattner 2005-01-19 08:07:05 +00:00
parent c4adfbbd0b
commit de87d146ab
1 changed files with 84 additions and 43 deletions

View File

@ -1165,47 +1165,24 @@ bool ISel::EmitOrOpOp(SDOperand Op1, SDOperand Op2, unsigned DestReg) {
// Find out if ShrAmt = 32-ShlAmt or ShlAmt = 32-ShrAmt.
if (ShlAmt.getOpcode() == ISD::SUB && ShlAmt.getOperand(1) == ShrAmt)
if (ConstantSDNode *SubCST = dyn_cast<ConstantSDNode>(ShlAmt.getOperand(0)))
if (SubCST->getValue() == RegSize && RegSize != 8) {
if (SubCST->getValue() == RegSize) {
// (A >> ShrAmt) | (A << (32-ShrAmt)) ==> ROR A, ShrAmt
// (A >> ShrAmt) | (B << (32-ShrAmt)) ==> SHRD A, B, ShrAmt
unsigned AReg, BReg;
if (getRegPressure(ShlVal) > getRegPressure(ShrVal)) {
AReg = SelectExpr(ShrVal);
BReg = SelectExpr(ShlVal);
} else {
BReg = SelectExpr(ShlVal);
AReg = SelectExpr(ShrVal);
}
unsigned ShAmt = SelectExpr(ShrAmt);
BuildMI(BB, X86::MOV8rr, 1, X86::CL).addReg(ShAmt);
unsigned Opc = RegSize == 16 ? X86::SHRD16rrCL : X86::SHRD32rrCL;
BuildMI(BB, Opc, 2, DestReg).addReg(AReg).addReg(BReg);
return true;
}
if (ShrAmt.getOpcode() == ISD::SUB && ShrAmt.getOperand(1) == ShlAmt)
if (ConstantSDNode *SubCST = dyn_cast<ConstantSDNode>(ShrAmt.getOperand(0)))
if (SubCST->getValue() == RegSize && RegSize != 8) {
// (A << ShlAmt) | (B >> (32-ShlAmt)) ==> SHLD A, B, ShrAmt
unsigned AReg, BReg;
if (getRegPressure(ShlVal) > getRegPressure(ShrVal)) {
AReg = SelectExpr(ShrVal);
BReg = SelectExpr(ShlVal);
} else {
BReg = SelectExpr(ShlVal);
AReg = SelectExpr(ShrVal);
}
unsigned ShAmt = SelectExpr(ShlAmt);
BuildMI(BB, X86::MOV8rr, 1, X86::CL).addReg(ShAmt);
unsigned Opc = RegSize == 16 ? X86::SHLD16rrCL : X86::SHLD32rrCL;
BuildMI(BB, Opc, 2, DestReg).addReg(AReg).addReg(BReg);
return true;
}
if (ConstantSDNode *ShrCst = dyn_cast<ConstantSDNode>(ShrAmt))
if (ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(ShlAmt))
if (ShrCst->getValue() < RegSize && ShlCst->getValue() < RegSize) {
if (ShrCst->getValue() == RegSize-ShlCst->getValue() && RegSize != 8) {
// (A >> 5) | (B << 27) --> SHRD A, B, 5
if (ShrVal == ShlVal) {
unsigned Reg, ShAmt;
if (getRegPressure(ShrVal) > getRegPressure(ShrAmt)) {
Reg = SelectExpr(ShrVal);
ShAmt = SelectExpr(ShrAmt);
} else {
ShAmt = SelectExpr(ShrAmt);
Reg = SelectExpr(ShrVal);
}
BuildMI(BB, X86::MOV8rr, 1, X86::CL).addReg(ShAmt);
unsigned Opc = RegSize == 8 ? X86::ROR8rCL :
(RegSize == 16 ? X86::ROR16rCL : X86::ROR32rCL);
BuildMI(BB, Opc, 1, DestReg).addReg(Reg);
return true;
} else if (RegSize != 8) {
unsigned AReg, BReg;
if (getRegPressure(ShlVal) > getRegPressure(ShrVal)) {
AReg = SelectExpr(ShrVal);
@ -1214,14 +1191,78 @@ bool ISel::EmitOrOpOp(SDOperand Op1, SDOperand Op2, unsigned DestReg) {
BReg = SelectExpr(ShlVal);
AReg = SelectExpr(ShrVal);
}
unsigned Opc = RegSize == 16 ? X86::SHRD16rri8 : X86::SHRD32rri8;
BuildMI(BB, Opc, 3, DestReg).addReg(AReg).addReg(BReg)
.addImm(ShrCst->getValue());
unsigned ShAmt = SelectExpr(ShrAmt);
BuildMI(BB, X86::MOV8rr, 1, X86::CL).addReg(ShAmt);
unsigned Opc = RegSize == 16 ? X86::SHRD16rrCL : X86::SHRD32rrCL;
BuildMI(BB, Opc, 2, DestReg).addReg(AReg).addReg(BReg);
return true;
}
}
if (ShrAmt.getOpcode() == ISD::SUB && ShrAmt.getOperand(1) == ShlAmt)
if (ConstantSDNode *SubCST = dyn_cast<ConstantSDNode>(ShrAmt.getOperand(0)))
if (SubCST->getValue() == RegSize) {
// (A << ShlAmt) | (A >> (32-ShlAmt)) ==> ROL A, ShrAmt
// (A << ShlAmt) | (B >> (32-ShlAmt)) ==> SHLD A, B, ShrAmt
if (ShrVal == ShlVal) {
unsigned Reg, ShAmt;
if (getRegPressure(ShrVal) > getRegPressure(ShlAmt)) {
Reg = SelectExpr(ShrVal);
ShAmt = SelectExpr(ShlAmt);
} else {
ShAmt = SelectExpr(ShlAmt);
Reg = SelectExpr(ShrVal);
}
BuildMI(BB, X86::MOV8rr, 1, X86::CL).addReg(ShAmt);
unsigned Opc = RegSize == 8 ? X86::ROL8rCL :
(RegSize == 16 ? X86::ROL16rCL : X86::ROL32rCL);
BuildMI(BB, Opc, 1, DestReg).addReg(Reg);
return true;
} else if (RegSize != 8) {
unsigned AReg, BReg;
if (getRegPressure(ShlVal) > getRegPressure(ShrVal)) {
AReg = SelectExpr(ShrVal);
BReg = SelectExpr(ShlVal);
} else {
BReg = SelectExpr(ShlVal);
AReg = SelectExpr(ShrVal);
}
unsigned ShAmt = SelectExpr(ShlAmt);
BuildMI(BB, X86::MOV8rr, 1, X86::CL).addReg(ShAmt);
unsigned Opc = RegSize == 16 ? X86::SHLD16rrCL : X86::SHLD32rrCL;
BuildMI(BB, Opc, 2, DestReg).addReg(AReg).addReg(BReg);
return true;
}
}
if (ConstantSDNode *ShrCst = dyn_cast<ConstantSDNode>(ShrAmt))
if (ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(ShlAmt))
if (ShrCst->getValue() < RegSize && ShlCst->getValue() < RegSize)
if (ShrCst->getValue() == RegSize-ShlCst->getValue()) {
// (A >> 5) | (A << 27) --> ROR A, 5
// (A >> 5) | (B << 27) --> SHRD A, B, 5
if (ShrVal == ShlVal) {
unsigned Reg = SelectExpr(ShrVal);
unsigned Opc = RegSize == 8 ? X86::ROR8ri :
(RegSize == 16 ? X86::ROR16ri : X86::ROR32ri);
BuildMI(BB, Opc, 2, DestReg).addReg(Reg).addImm(ShrCst->getValue());
return true;
} else if (RegSize != 8) {
unsigned AReg, BReg;
if (getRegPressure(ShlVal) > getRegPressure(ShrVal)) {
AReg = SelectExpr(ShrVal);
BReg = SelectExpr(ShlVal);
} else {
BReg = SelectExpr(ShlVal);
AReg = SelectExpr(ShrVal);
}
unsigned Opc = RegSize == 16 ? X86::SHRD16rri8 : X86::SHRD32rri8;
BuildMI(BB, Opc, 3, DestReg).addReg(AReg).addReg(BReg)
.addImm(ShrCst->getValue());
return true;
}
}
return false;
}