From 4483df8b6398f44b9ba6f221b78c9f6b6138d468 Mon Sep 17 00:00:00 2001 From: Nate Begeman Date: Sun, 29 Aug 2004 08:19:32 +0000 Subject: [PATCH] Implement the following missing functionality in the PPC backend: cast fp->bool cast ulong->fp algebraic right shift long by non-constant value These changes tested across most of the test suite. Fixes Regression/casts llvm-svn: 16081 --- llvm/lib/Target/PowerPC/PPC32ISelSimple.cpp | 179 +++++++++++++++----- llvm/lib/Target/PowerPC/PowerPCInstrInfo.td | 1 + llvm/lib/Target/PowerPC/README.txt | 23 +-- 3 files changed, 137 insertions(+), 66 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPC32ISelSimple.cpp b/llvm/lib/Target/PowerPC/PPC32ISelSimple.cpp index 814aaf31c750..923e8d57bc0d 100644 --- a/llvm/lib/Target/PowerPC/PPC32ISelSimple.cpp +++ b/llvm/lib/Target/PowerPC/PPC32ISelSimple.cpp @@ -2324,12 +2324,43 @@ void ISel::emitShiftOperation(MachineBasicBlock *MBB, BuildMI(*MBB, IP, PPC::SLW, 2, DestReg+1).addReg(SrcReg+1) .addReg(ShiftAmountReg); } else { - if (isSigned) { - // FIXME: Unimplemented - // Page C-3 of the PowerPC 32bit Programming Environments Manual - std::cerr << "ERROR: Unimplemented: signed right shift of long\n"; - abort(); - } else { + if (isSigned) { // shift right algebraic + MachineBasicBlock *TmpMBB =new MachineBasicBlock(BB->getBasicBlock()); + MachineBasicBlock *PhiMBB =new MachineBasicBlock(BB->getBasicBlock()); + MachineBasicBlock *OldMBB = BB; + ilist::iterator It = BB; ++It; + F->getBasicBlockList().insert(It, TmpMBB); + F->getBasicBlockList().insert(It, PhiMBB); + BB->addSuccessor(TmpMBB); + BB->addSuccessor(PhiMBB); + + BuildMI(*MBB, IP, PPC::SUBFIC, 2, TmpReg1).addReg(ShiftAmountReg) + .addSImm(32); + BuildMI(*MBB, IP, PPC::SRW, 2, TmpReg2).addReg(SrcReg+1) + .addReg(ShiftAmountReg); + BuildMI(*MBB, IP, PPC::SLW, 2, TmpReg3).addReg(SrcReg) + .addReg(TmpReg1); + BuildMI(*MBB, IP, PPC::OR, 2, TmpReg4).addReg(TmpReg2) + .addReg(TmpReg3); + BuildMI(*MBB, IP, PPC::ADDICo, 2, TmpReg5).addReg(ShiftAmountReg) + .addSImm(-32); + BuildMI(*MBB, IP, PPC::SRAW, 2, TmpReg6).addReg(SrcReg) + .addReg(TmpReg5); + BuildMI(*MBB, IP, PPC::SRAW, 2, DestReg).addReg(SrcReg) + .addReg(ShiftAmountReg); + BuildMI(*MBB, IP, PPC::BLE, 2).addReg(PPC::CR0).addMBB(PhiMBB); + + // OrMBB: + // Select correct least significant half if the shift amount > 32 + BB = TmpMBB; + unsigned OrReg = makeAnotherReg(Type::IntTy); + BuildMI(BB, PPC::OR, 2, OrReg).addReg(TmpReg6).addImm(TmpReg6); + TmpMBB->addSuccessor(PhiMBB); + + BB = PhiMBB; + BuildMI(BB, PPC::PHI, 4, DestReg+1).addReg(TmpReg4).addMBB(OldMBB) + .addReg(OrReg).addMBB(TmpMBB); + } else { // shift right logical BuildMI(*MBB, IP, PPC::SUBFIC, 2, TmpReg1).addReg(ShiftAmountReg) .addSImm(32); BuildMI(*MBB, IP, PPC::SRW, 2, TmpReg2).addReg(SrcReg+1) @@ -2654,9 +2685,12 @@ void ISel::emitCastOperation(MachineBasicBlock *MBB, } case cFP32: case cFP64: - // FSEL perhaps? - std::cerr << "ERROR: Cast fp-to-bool not implemented!\n"; - abort(); + unsigned TmpReg = makeAnotherReg(Type::IntTy); + unsigned ConstZero = getReg(ConstantFP::get(Type::DoubleTy, 0.0), BB, IP); + BuildMI(*MBB, IP, PPC::FCMPU, PPC::CR7).addReg(SrcReg).addReg(ConstZero); + BuildMI(*MBB, IP, PPC::MFCR, TmpReg); + BuildMI(*MBB, IP, PPC::RLWINM, DestReg).addReg(TmpReg).addImm(31) + .addImm(31).addImm(31); } return; } @@ -2678,13 +2712,70 @@ void ISel::emitCastOperation(MachineBasicBlock *MBB, // Emit a library call for long to float conversion if (SrcClass == cLong) { - std::vector Args; - Args.push_back(ValueRecord(SrcReg, SrcTy)); Function *floatFn = (DestClass == cFP32) ? __floatdisfFn : __floatdidfFn; - MachineInstr *TheCall = - BuildMI(PPC::CALLpcrel, 1).addGlobalAddress(floatFn, true); - doCall(ValueRecord(DestReg, DestTy), TheCall, Args, false); - TM.CalledFunctions.insert(floatFn); + if (SrcTy->isSigned()) { + std::vector Args; + Args.push_back(ValueRecord(SrcReg, SrcTy)); + MachineInstr *TheCall = + BuildMI(PPC::CALLpcrel, 1).addGlobalAddress(floatFn, true); + doCall(ValueRecord(DestReg, DestTy), TheCall, Args, false); + TM.CalledFunctions.insert(floatFn); + } else { + std::vector CmpArgs, ClrArgs, SetArgs; + unsigned ZeroLong = getReg(ConstantUInt::get(SrcTy, 0)); + unsigned CondReg = makeAnotherReg(Type::IntTy); + + // Update machine-CFG edges + MachineBasicBlock *ClrMBB = new MachineBasicBlock(BB->getBasicBlock()); + MachineBasicBlock *SetMBB = new MachineBasicBlock(BB->getBasicBlock()); + MachineBasicBlock *PhiMBB = new MachineBasicBlock(BB->getBasicBlock()); + MachineBasicBlock *OldMBB = BB; + ilist::iterator It = BB; ++It; + F->getBasicBlockList().insert(It, ClrMBB); + F->getBasicBlockList().insert(It, SetMBB); + F->getBasicBlockList().insert(It, PhiMBB); + BB->addSuccessor(ClrMBB); + BB->addSuccessor(SetMBB); + + CmpArgs.push_back(ValueRecord(SrcReg, SrcTy)); + CmpArgs.push_back(ValueRecord(ZeroLong, SrcTy)); + MachineInstr *TheCall = + BuildMI(PPC::CALLpcrel, 1).addGlobalAddress(__cmpdi2Fn, true); + doCall(ValueRecord(CondReg, Type::IntTy), TheCall, CmpArgs, false); + TM.CalledFunctions.insert(__cmpdi2Fn); + BuildMI(*MBB, IP, PPC::CMPWI, 2, PPC::CR0).addReg(CondReg).addSImm(0); + BuildMI(*MBB, IP, PPC::BLE, 2).addReg(PPC::CR0).addMBB(SetMBB); + + // ClrMBB + BB = ClrMBB; + unsigned ClrReg = makeAnotherReg(DestTy); + ClrArgs.push_back(ValueRecord(SrcReg, SrcTy)); + TheCall = BuildMI(PPC::CALLpcrel, 1).addGlobalAddress(floatFn, true); + doCall(ValueRecord(ClrReg, DestTy), TheCall, ClrArgs, false); + TM.CalledFunctions.insert(floatFn); + BuildMI(BB, PPC::B, 1).addMBB(PhiMBB); + BB->addSuccessor(PhiMBB); + + // SetMBB + BB = SetMBB; + unsigned SetReg = makeAnotherReg(DestTy); + unsigned CallReg = makeAnotherReg(DestTy); + unsigned ShiftedReg = makeAnotherReg(SrcTy); + ConstantSInt *Const1 = ConstantSInt::get(Type::IntTy, 1); + emitShiftOperation(BB, BB->end(), Src, Const1, false, SrcTy, ShiftedReg); + SetArgs.push_back(ValueRecord(ShiftedReg, SrcTy)); + TheCall = BuildMI(PPC::CALLpcrel, 1).addGlobalAddress(floatFn, true); + doCall(ValueRecord(CallReg, DestTy), TheCall, SetArgs, false); + TM.CalledFunctions.insert(floatFn); + unsigned SetOpcode = (DestClass == cFP32) ? PPC::FADDS : PPC::FADD; + BuildMI(BB, SetOpcode, 2, SetReg).addReg(CallReg).addReg(CallReg); + BB->addSuccessor(PhiMBB); + + // PhiMBB + BB = PhiMBB; + BuildMI(BB, PPC::PHI, 4, DestReg).addReg(ClrReg).addMBB(ClrMBB) + .addReg(SetReg).addMBB(SetMBB); + } return; } @@ -2706,25 +2797,25 @@ void ISel::emitCastOperation(MachineBasicBlock *MBB, if (!SrcTy->isSigned()) { ConstantFP *CFP = ConstantFP::get(Type::DoubleTy, 0x1.000000p52); unsigned ConstF = getReg(CFP, BB, IP); - BuildMI(*BB, IP, PPC::LIS, 1, constantHi).addSImm(0x4330); - addFrameReference(BuildMI(*BB, IP, PPC::STW, 3).addReg(constantHi), + BuildMI(*MBB, IP, PPC::LIS, 1, constantHi).addSImm(0x4330); + addFrameReference(BuildMI(*MBB, IP, PPC::STW, 3).addReg(constantHi), ValueFrameIdx); - addFrameReference(BuildMI(*BB, IP, PPC::STW, 3).addReg(SrcReg), + addFrameReference(BuildMI(*MBB, IP, PPC::STW, 3).addReg(SrcReg), ValueFrameIdx, 4); - addFrameReference(BuildMI(*BB, IP, PPC::LFD, 2, TempF), ValueFrameIdx); - BuildMI(*BB, IP, PPC::FSUB, 2, DestReg).addReg(TempF).addReg(ConstF); + addFrameReference(BuildMI(*MBB, IP, PPC::LFD, 2, TempF), ValueFrameIdx); + BuildMI(*MBB, IP, PPC::FSUB, 2, DestReg).addReg(TempF).addReg(ConstF); } else { ConstantFP *CFP = ConstantFP::get(Type::DoubleTy, 0x1.000008p52); unsigned ConstF = getReg(CFP, BB, IP); unsigned TempLo = makeAnotherReg(Type::IntTy); - BuildMI(*BB, IP, PPC::LIS, 1, constantHi).addSImm(0x4330); - addFrameReference(BuildMI(*BB, IP, PPC::STW, 3).addReg(constantHi), + BuildMI(*MBB, IP, PPC::LIS, 1, constantHi).addSImm(0x4330); + addFrameReference(BuildMI(*MBB, IP, PPC::STW, 3).addReg(constantHi), ValueFrameIdx); - BuildMI(*BB, IP, PPC::XORIS, 2, TempLo).addReg(SrcReg).addImm(0x8000); - addFrameReference(BuildMI(*BB, IP, PPC::STW, 3).addReg(TempLo), + BuildMI(*MBB, IP, PPC::XORIS, 2, TempLo).addReg(SrcReg).addImm(0x8000); + addFrameReference(BuildMI(*MBB, IP, PPC::STW, 3).addReg(TempLo), ValueFrameIdx, 4); - addFrameReference(BuildMI(*BB, IP, PPC::LFD, 2, TempF), ValueFrameIdx); - BuildMI(*BB, IP, PPC::FSUB, 2, DestReg).addReg(TempF).addReg(ConstF); + addFrameReference(BuildMI(*MBB, IP, PPC::LFD, 2, TempF), ValueFrameIdx); + BuildMI(*MBB, IP, PPC::FSUB, 2, DestReg).addReg(TempF).addReg(ConstF); } return; } @@ -2754,8 +2845,8 @@ void ISel::emitCastOperation(MachineBasicBlock *MBB, unsigned TempReg = makeAnotherReg(Type::DoubleTy); // Convert to integer in the FP reg and store it to a stack slot - BuildMI(*BB, IP, PPC::FCTIWZ, 1, TempReg).addReg(SrcReg); - addFrameReference(BuildMI(*BB, IP, PPC::STFD, 3) + BuildMI(*MBB, IP, PPC::FCTIWZ, 1, TempReg).addReg(SrcReg); + addFrameReference(BuildMI(*MBB, IP, PPC::STFD, 3) .addReg(TempReg), ValueFrameIdx); // There is no load signed byte opcode, so we must emit a sign extend for @@ -2763,13 +2854,13 @@ void ISel::emitCastOperation(MachineBasicBlock *MBB, // correct offset. if (DestClass == cByte) { unsigned TempReg2 = makeAnotherReg(DestTy); - addFrameReference(BuildMI(*BB, IP, PPC::LBZ, 2, TempReg2), + addFrameReference(BuildMI(*MBB, IP, PPC::LBZ, 2, TempReg2), ValueFrameIdx, 7); - BuildMI(*BB, IP, PPC::EXTSB, 1, DestReg).addReg(TempReg2); + BuildMI(*MBB, IP, PPC::EXTSB, 1, DestReg).addReg(TempReg2); } else { int offset = (DestClass == cShort) ? 6 : 4; unsigned LoadOp = (DestClass == cShort) ? PPC::LHA : PPC::LWZ; - addFrameReference(BuildMI(*BB, IP, LoadOp, 2, DestReg), + addFrameReference(BuildMI(*MBB, IP, LoadOp, 2, DestReg), ValueFrameIdx, offset); } } else { @@ -2800,34 +2891,34 @@ void ISel::emitCastOperation(MachineBasicBlock *MBB, // Convert from floating point to unsigned 32-bit value // Use 0 if incoming value is < 0.0 - BuildMI(*BB, IP, PPC::FSEL, 3, UseZero).addReg(SrcReg).addReg(SrcReg) + BuildMI(*MBB, IP, PPC::FSEL, 3, UseZero).addReg(SrcReg).addReg(SrcReg) .addReg(Zero); // Use 2**32 - 1 if incoming value is >= 2**32 - BuildMI(*BB, IP, PPC::FSUB, 2, UseMaxInt).addReg(MaxInt).addReg(SrcReg); - BuildMI(*BB, IP, PPC::FSEL, 3, UseChoice).addReg(UseMaxInt) + BuildMI(*MBB, IP, PPC::FSUB, 2, UseMaxInt).addReg(MaxInt).addReg(SrcReg); + BuildMI(*MBB, IP, PPC::FSEL, 3, UseChoice).addReg(UseMaxInt) .addReg(UseZero).addReg(MaxInt); // Subtract 2**31 - BuildMI(*BB, IP, PPC::FSUB, 2, TmpReg).addReg(UseChoice).addReg(Border); + BuildMI(*MBB, IP, PPC::FSUB, 2, TmpReg).addReg(UseChoice).addReg(Border); // Use difference if >= 2**31 - BuildMI(*BB, IP, PPC::FCMPU, 2, PPC::CR0).addReg(UseChoice) + BuildMI(*MBB, IP, PPC::FCMPU, 2, PPC::CR0).addReg(UseChoice) .addReg(Border); - BuildMI(*BB, IP, PPC::FSEL, 3, TmpReg2).addReg(TmpReg).addReg(TmpReg) + BuildMI(*MBB, IP, PPC::FSEL, 3, TmpReg2).addReg(TmpReg).addReg(TmpReg) .addReg(UseChoice); // Convert to integer - BuildMI(*BB, IP, PPC::FCTIWZ, 1, ConvReg).addReg(TmpReg2); - addFrameReference(BuildMI(*BB, IP, PPC::STFD, 3).addReg(ConvReg), + BuildMI(*MBB, IP, PPC::FCTIWZ, 1, ConvReg).addReg(TmpReg2); + addFrameReference(BuildMI(*MBB, IP, PPC::STFD, 3).addReg(ConvReg), FrameIdx); if (DestClass == cByte) { - addFrameReference(BuildMI(*BB, IP, PPC::LBZ, 2, DestReg), + addFrameReference(BuildMI(*MBB, IP, PPC::LBZ, 2, DestReg), FrameIdx, 7); } else if (DestClass == cShort) { - addFrameReference(BuildMI(*BB, IP, PPC::LHZ, 2, DestReg), + addFrameReference(BuildMI(*MBB, IP, PPC::LHZ, 2, DestReg), FrameIdx, 6); } if (DestClass == cInt) { - addFrameReference(BuildMI(*BB, IP, PPC::LWZ, 2, IntTmp), + addFrameReference(BuildMI(*MBB, IP, PPC::LWZ, 2, IntTmp), FrameIdx, 4); - BuildMI(*BB, IP, PPC::BLT, 2).addReg(PPC::CR0).addMBB(PhiMBB); - BuildMI(*BB, IP, PPC::B, 1).addMBB(XorMBB); + BuildMI(*MBB, IP, PPC::BLT, 2).addReg(PPC::CR0).addMBB(PhiMBB); + BuildMI(*MBB, IP, PPC::B, 1).addMBB(XorMBB); // XorMBB: // add 2**31 if input was >= 2**31 diff --git a/llvm/lib/Target/PowerPC/PowerPCInstrInfo.td b/llvm/lib/Target/PowerPC/PowerPCInstrInfo.td index 6ef0bdd6ed7b..8629824ab2d0 100644 --- a/llvm/lib/Target/PowerPC/PowerPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PowerPCInstrInfo.td @@ -47,6 +47,7 @@ def SUBI : DForm_2<"subi", 14, 0, 0>; def LI : DForm_2_r0<"li", 14, 0, 0>; def LIS : DForm_2_r0<"lis", 15, 0, 0>; def ADDIC : DForm_2<"addic", 12, 0, 0>; +def ADDICo : DForm_2<"addic.", 13, 0, 0>; def ADD : XOForm_1<"add", 31, 266, 0, 0, 0, 0>; def ADDC : XOForm_1<"addc", 31, 10, 0, 0, 0, 0>; def ADDE : XOForm_1<"adde", 31, 138, 0, 0, 0, 0>; diff --git a/llvm/lib/Target/PowerPC/README.txt b/llvm/lib/Target/PowerPC/README.txt index ab383df8614a..222780b87c6c 100644 --- a/llvm/lib/Target/PowerPC/README.txt +++ b/llvm/lib/Target/PowerPC/README.txt @@ -1,28 +1,10 @@ TODO: +* switch to auto-generated asm writer * use stfiwx in float->int -* implement cast fp to bool -* implement algebraic shift right long by reg * implement scheduling info * implement powerpc-64 for darwin * implement powerpc-64 for aix * fix rlwimi generation to be use-and-def -* fix ulong to double: - floatdidf assumes signed longs. so if the high but of a ulong - just happens to be set, you get the wrong sign. The fix for this - is to call cmpdi2 to compare against zero, if so shift right by one, - convert to fp, and multiply by (add to itself). the sequence would - look like: - {r3:r4} holds ulong a; - li r5, 0 - li r6, 0 (set r5:r6 to ulong 0) - call cmpdi2 ==> sets r3 <, =, > 0 - if r3 > 0 - call floatdidf as usual - else - shift right ulong a, 1 (we could use emitShift) - call floatdidf - fadd f1, f1, f1 (fp left shift by 1) -* cast elimination pass (uint -> sbyte -> short, kill the byte -> short) * should hint to the branch select pass that it doesn't need to print the second unconditional branch, so we don't end up with things like: b .LBBl42__2E_expand_function_8_674 ; loopentry.24 @@ -30,9 +12,6 @@ TODO: b .LBBl42__2E_expand_function_8_42 ; NewDefault Currently failing tests that should pass: -* SingleSource - `- Regression - | `- casts (ulong to fp failure) * MultiSource |- Applications | `- hbd: miscompilation