From de87d146ab8d76c5cd3932cc7288e88e8e278a4d Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Wed, 19 Jan 2005 08:07:05 +0000 Subject: [PATCH] Implement Regression/CodeGen/X86/rotate.ll: emit rotate instructions (which typically cost 1 cycle) instead of shld/shrd instruction (which are typically 6 or more cycles). This also saves code space. For example, instead of emitting: rotr: mov %EAX, DWORD PTR [%ESP + 4] mov %CL, BYTE PTR [%ESP + 8] shrd %EAX, %EAX, %CL ret rotli: mov %EAX, DWORD PTR [%ESP + 4] shrd %EAX, %EAX, 27 ret Emit: rotr32: mov %CL, BYTE PTR [%ESP + 8] mov %EAX, DWORD PTR [%ESP + 4] ror %EAX, %CL ret rotli32: mov %EAX, DWORD PTR [%ESP + 4] ror %EAX, 27 ret We also emit byte rotate instructions which do not have a sh[lr]d counterpart at all. llvm-svn: 19692 --- llvm/lib/Target/X86/X86ISelPattern.cpp | 127 ++++++++++++++++--------- 1 file changed, 84 insertions(+), 43 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelPattern.cpp b/llvm/lib/Target/X86/X86ISelPattern.cpp index 3012dfd8d16e..574cbf7654c4 100644 --- a/llvm/lib/Target/X86/X86ISelPattern.cpp +++ b/llvm/lib/Target/X86/X86ISelPattern.cpp @@ -1165,47 +1165,24 @@ bool ISel::EmitOrOpOp(SDOperand Op1, SDOperand Op2, unsigned DestReg) { // Find out if ShrAmt = 32-ShlAmt or ShlAmt = 32-ShrAmt. if (ShlAmt.getOpcode() == ISD::SUB && ShlAmt.getOperand(1) == ShrAmt) if (ConstantSDNode *SubCST = dyn_cast(ShlAmt.getOperand(0))) - if (SubCST->getValue() == RegSize && RegSize != 8) { + if (SubCST->getValue() == RegSize) { + // (A >> ShrAmt) | (A << (32-ShrAmt)) ==> ROR A, ShrAmt // (A >> ShrAmt) | (B << (32-ShrAmt)) ==> SHRD A, B, ShrAmt - unsigned AReg, BReg; - if (getRegPressure(ShlVal) > getRegPressure(ShrVal)) { - AReg = SelectExpr(ShrVal); - BReg = SelectExpr(ShlVal); - } else { - BReg = SelectExpr(ShlVal); - AReg = SelectExpr(ShrVal); - } - unsigned ShAmt = SelectExpr(ShrAmt); - BuildMI(BB, X86::MOV8rr, 1, X86::CL).addReg(ShAmt); - unsigned Opc = RegSize == 16 ? X86::SHRD16rrCL : X86::SHRD32rrCL; - BuildMI(BB, Opc, 2, DestReg).addReg(AReg).addReg(BReg); - return true; - } - - if (ShrAmt.getOpcode() == ISD::SUB && ShrAmt.getOperand(1) == ShlAmt) - if (ConstantSDNode *SubCST = dyn_cast(ShrAmt.getOperand(0))) - if (SubCST->getValue() == RegSize && RegSize != 8) { - // (A << ShlAmt) | (B >> (32-ShlAmt)) ==> SHLD A, B, ShrAmt - unsigned AReg, BReg; - if (getRegPressure(ShlVal) > getRegPressure(ShrVal)) { - AReg = SelectExpr(ShrVal); - BReg = SelectExpr(ShlVal); - } else { - BReg = SelectExpr(ShlVal); - AReg = SelectExpr(ShrVal); - } - unsigned ShAmt = SelectExpr(ShlAmt); - BuildMI(BB, X86::MOV8rr, 1, X86::CL).addReg(ShAmt); - unsigned Opc = RegSize == 16 ? X86::SHLD16rrCL : X86::SHLD32rrCL; - BuildMI(BB, Opc, 2, DestReg).addReg(AReg).addReg(BReg); - return true; - } - - if (ConstantSDNode *ShrCst = dyn_cast(ShrAmt)) - if (ConstantSDNode *ShlCst = dyn_cast(ShlAmt)) - if (ShrCst->getValue() < RegSize && ShlCst->getValue() < RegSize) { - if (ShrCst->getValue() == RegSize-ShlCst->getValue() && RegSize != 8) { - // (A >> 5) | (B << 27) --> SHRD A, B, 5 + if (ShrVal == ShlVal) { + unsigned Reg, ShAmt; + if (getRegPressure(ShrVal) > getRegPressure(ShrAmt)) { + Reg = SelectExpr(ShrVal); + ShAmt = SelectExpr(ShrAmt); + } else { + ShAmt = SelectExpr(ShrAmt); + Reg = SelectExpr(ShrVal); + } + BuildMI(BB, X86::MOV8rr, 1, X86::CL).addReg(ShAmt); + unsigned Opc = RegSize == 8 ? X86::ROR8rCL : + (RegSize == 16 ? X86::ROR16rCL : X86::ROR32rCL); + BuildMI(BB, Opc, 1, DestReg).addReg(Reg); + return true; + } else if (RegSize != 8) { unsigned AReg, BReg; if (getRegPressure(ShlVal) > getRegPressure(ShrVal)) { AReg = SelectExpr(ShrVal); @@ -1214,14 +1191,78 @@ bool ISel::EmitOrOpOp(SDOperand Op1, SDOperand Op2, unsigned DestReg) { BReg = SelectExpr(ShlVal); AReg = SelectExpr(ShrVal); } - unsigned Opc = RegSize == 16 ? X86::SHRD16rri8 : X86::SHRD32rri8; - BuildMI(BB, Opc, 3, DestReg).addReg(AReg).addReg(BReg) - .addImm(ShrCst->getValue()); + unsigned ShAmt = SelectExpr(ShrAmt); + BuildMI(BB, X86::MOV8rr, 1, X86::CL).addReg(ShAmt); + unsigned Opc = RegSize == 16 ? X86::SHRD16rrCL : X86::SHRD32rrCL; + BuildMI(BB, Opc, 2, DestReg).addReg(AReg).addReg(BReg); return true; } } + if (ShrAmt.getOpcode() == ISD::SUB && ShrAmt.getOperand(1) == ShlAmt) + if (ConstantSDNode *SubCST = dyn_cast(ShrAmt.getOperand(0))) + if (SubCST->getValue() == RegSize) { + // (A << ShlAmt) | (A >> (32-ShlAmt)) ==> ROL A, ShrAmt + // (A << ShlAmt) | (B >> (32-ShlAmt)) ==> SHLD A, B, ShrAmt + if (ShrVal == ShlVal) { + unsigned Reg, ShAmt; + if (getRegPressure(ShrVal) > getRegPressure(ShlAmt)) { + Reg = SelectExpr(ShrVal); + ShAmt = SelectExpr(ShlAmt); + } else { + ShAmt = SelectExpr(ShlAmt); + Reg = SelectExpr(ShrVal); + } + BuildMI(BB, X86::MOV8rr, 1, X86::CL).addReg(ShAmt); + unsigned Opc = RegSize == 8 ? X86::ROL8rCL : + (RegSize == 16 ? X86::ROL16rCL : X86::ROL32rCL); + BuildMI(BB, Opc, 1, DestReg).addReg(Reg); + return true; + } else if (RegSize != 8) { + unsigned AReg, BReg; + if (getRegPressure(ShlVal) > getRegPressure(ShrVal)) { + AReg = SelectExpr(ShrVal); + BReg = SelectExpr(ShlVal); + } else { + BReg = SelectExpr(ShlVal); + AReg = SelectExpr(ShrVal); + } + unsigned ShAmt = SelectExpr(ShlAmt); + BuildMI(BB, X86::MOV8rr, 1, X86::CL).addReg(ShAmt); + unsigned Opc = RegSize == 16 ? X86::SHLD16rrCL : X86::SHLD32rrCL; + BuildMI(BB, Opc, 2, DestReg).addReg(AReg).addReg(BReg); + return true; + } + } + if (ConstantSDNode *ShrCst = dyn_cast(ShrAmt)) + if (ConstantSDNode *ShlCst = dyn_cast(ShlAmt)) + if (ShrCst->getValue() < RegSize && ShlCst->getValue() < RegSize) + if (ShrCst->getValue() == RegSize-ShlCst->getValue()) { + // (A >> 5) | (A << 27) --> ROR A, 5 + // (A >> 5) | (B << 27) --> SHRD A, B, 5 + if (ShrVal == ShlVal) { + unsigned Reg = SelectExpr(ShrVal); + unsigned Opc = RegSize == 8 ? X86::ROR8ri : + (RegSize == 16 ? X86::ROR16ri : X86::ROR32ri); + BuildMI(BB, Opc, 2, DestReg).addReg(Reg).addImm(ShrCst->getValue()); + return true; + } else if (RegSize != 8) { + unsigned AReg, BReg; + if (getRegPressure(ShlVal) > getRegPressure(ShrVal)) { + AReg = SelectExpr(ShrVal); + BReg = SelectExpr(ShlVal); + } else { + BReg = SelectExpr(ShlVal); + AReg = SelectExpr(ShrVal); + } + unsigned Opc = RegSize == 16 ? X86::SHRD16rri8 : X86::SHRD32rri8; + BuildMI(BB, Opc, 3, DestReg).addReg(AReg).addReg(BReg) + .addImm(ShrCst->getValue()); + return true; + } + } + return false; }