forked from OSchip/llvm-project
Teach spiller to unfold instructions which modref spill slot when a scratch
register is available and when it's profitable. e.g. xorq %r12<kill>, %r13 addq %rax, -184(%rbp) addq %r13, -184(%rbp) ==> xorq %r12<kill>, %r13 movq -184(%rbp), %r12 addq %rax, %r12 addq %r13, %r12 movq %r12, -184(%rbp) Two more instructions, but fewer memory accesses. It can also open up opportunities for more optimizations. llvm-svn: 69341
This commit is contained in:
parent
c3e7cff6d3
commit
b96a1082a9
|
@ -29,6 +29,7 @@ STATISTIC(NumLoads , "Number of loads added");
|
|||
STATISTIC(NumReused , "Number of values reused");
|
||||
STATISTIC(NumDCE , "Number of copies elided");
|
||||
STATISTIC(NumSUnfold , "Number of stores unfolded");
|
||||
STATISTIC(NumModRefUnfold, "Number of modref unfolded");
|
||||
|
||||
namespace {
|
||||
enum SpillerName { simple, local };
|
||||
|
@ -524,6 +525,7 @@ bool LocalSpiller::runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM) {
|
|||
RegInfo = &MF.getRegInfo();
|
||||
TRI = MF.getTarget().getRegisterInfo();
|
||||
TII = MF.getTarget().getInstrInfo();
|
||||
AllocatableRegs = TRI->getAllocatableSet(MF);
|
||||
DOUT << "\n**** Local spiller rewriting function '"
|
||||
<< MF.getFunction()->getName() << "':\n";
|
||||
DOUT << "**** Machine Instrs (NOTE! Does not include spills and reloads!)"
|
||||
|
@ -595,7 +597,201 @@ bool LocalSpiller::runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM) {
|
|||
}
|
||||
|
||||
|
||||
/// PrepForUnfoldOpti - Turn a store folding instruction into a load folding
|
||||
/// FoldsStackSlotModRef - Return true if the specified MI folds the specified
|
||||
/// stack slot mod/ref. It also checks if it's possible to unfold the
|
||||
/// instruction by having it define a specified physical register instead.
|
||||
static bool FoldsStackSlotModRef(MachineInstr &MI, int SS, unsigned PhysReg,
|
||||
const TargetInstrInfo *TII,
|
||||
const TargetRegisterInfo *TRI,
|
||||
VirtRegMap &VRM) {
|
||||
if (VRM.hasEmergencySpills(&MI) || VRM.isSpillPt(&MI))
|
||||
return false;
|
||||
|
||||
bool Found = false;
|
||||
VirtRegMap::MI2VirtMapTy::const_iterator I, End;
|
||||
for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ++I) {
|
||||
unsigned VirtReg = I->second.first;
|
||||
VirtRegMap::ModRef MR = I->second.second;
|
||||
if (MR & VirtRegMap::isModRef)
|
||||
if (VRM.getStackSlot(VirtReg) == SS) {
|
||||
Found= TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(), true, true) != 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!Found)
|
||||
return false;
|
||||
|
||||
// Does the instruction uses a register that overlaps the scratch register?
|
||||
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
|
||||
MachineOperand &MO = MI.getOperand(i);
|
||||
if (!MO.isReg() || MO.getReg() == 0)
|
||||
continue;
|
||||
unsigned Reg = MO.getReg();
|
||||
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
|
||||
if (!VRM.hasPhys(Reg))
|
||||
continue;
|
||||
Reg = VRM.getPhys(Reg);
|
||||
}
|
||||
if (TRI->regsOverlap(PhysReg, Reg))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// FindFreeRegister - Find a free register of a given register class by looking
|
||||
/// at (at most) the last two machine instructions.
|
||||
static unsigned FindFreeRegister(MachineBasicBlock::iterator MII,
|
||||
MachineBasicBlock &MBB,
|
||||
const TargetRegisterClass *RC,
|
||||
const TargetRegisterInfo *TRI,
|
||||
BitVector &AllocatableRegs) {
|
||||
BitVector Defs(TRI->getNumRegs());
|
||||
BitVector Uses(TRI->getNumRegs());
|
||||
SmallVector<unsigned, 4> LocalUses;
|
||||
SmallVector<unsigned, 4> Kills;
|
||||
|
||||
// Take a look at 2 instructions at most.
|
||||
for (unsigned Count = 0; Count < 2; ++Count) {
|
||||
if (MII == MBB.begin())
|
||||
break;
|
||||
MachineInstr *PrevMI = prior(MII);
|
||||
for (unsigned i = 0, e = PrevMI->getNumOperands(); i != e; ++i) {
|
||||
MachineOperand &MO = PrevMI->getOperand(i);
|
||||
if (!MO.isReg() || MO.getReg() == 0)
|
||||
continue;
|
||||
unsigned Reg = MO.getReg();
|
||||
if (MO.isDef()) {
|
||||
Defs.set(Reg);
|
||||
for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
|
||||
Defs.set(*AS);
|
||||
} else {
|
||||
LocalUses.push_back(Reg);
|
||||
if (MO.isKill() && AllocatableRegs[Reg])
|
||||
Kills.push_back(Reg);
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned i = 0, e = Kills.size(); i != e; ++i) {
|
||||
unsigned Kill = Kills[i];
|
||||
if (!Defs[Kill] && !Uses[Kill] &&
|
||||
TRI->getPhysicalRegisterRegClass(Kill) == RC)
|
||||
return Kill;
|
||||
}
|
||||
for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) {
|
||||
unsigned Reg = LocalUses[i];
|
||||
Uses.set(Reg);
|
||||
for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
|
||||
Uses.set(*AS);
|
||||
}
|
||||
|
||||
MII = PrevMI;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static
|
||||
void AssignPhysToVirtReg(MachineInstr *MI, unsigned VirtReg, unsigned PhysReg) {
|
||||
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
|
||||
MachineOperand &MO = MI->getOperand(i);
|
||||
if (MO.isReg() && MO.getReg() == VirtReg)
|
||||
MO.setReg(PhysReg);
|
||||
}
|
||||
}
|
||||
|
||||
/// OptimizeByUnfold2 - Unfold a series of load / store folding instructions if
|
||||
/// a scratch register is available.
|
||||
/// xorq %r12<kill>, %r13
|
||||
/// addq %rax, -184(%rbp)
|
||||
/// addq %r13, -184(%rbp)
|
||||
/// ==>
|
||||
/// xorq %r12<kill>, %r13
|
||||
/// movq -184(%rbp), %r12
|
||||
/// addq %rax, %r12
|
||||
/// addq %r13, %r12
|
||||
/// movq %r12, -184(%rbp)
|
||||
bool LocalSpiller::OptimizeByUnfold2(unsigned VirtReg, int SS,
|
||||
MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator &MII,
|
||||
std::vector<MachineInstr*> &MaybeDeadStores,
|
||||
AvailableSpills &Spills,
|
||||
BitVector &RegKills,
|
||||
std::vector<MachineOperand*> &KillOps,
|
||||
VirtRegMap &VRM) {
|
||||
MachineBasicBlock::iterator NextMII = next(MII);
|
||||
if (NextMII == MBB.end())
|
||||
return false;
|
||||
|
||||
if (TII->getOpcodeAfterMemoryUnfold(MII->getOpcode(), true, true) == 0)
|
||||
return false;
|
||||
|
||||
// Now let's see if the last couple of instructions happens to have freed up
|
||||
// a register.
|
||||
const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
|
||||
unsigned PhysReg = FindFreeRegister(MII, MBB, RC, TRI, AllocatableRegs);
|
||||
if (!PhysReg)
|
||||
return false;
|
||||
|
||||
MachineFunction &MF = *MBB.getParent();
|
||||
TRI = MF.getTarget().getRegisterInfo();
|
||||
MachineInstr &MI = *MII;
|
||||
if (!FoldsStackSlotModRef(MI, SS, PhysReg, TII, TRI, VRM))
|
||||
return false;
|
||||
|
||||
// If the next instruction also folds the same SS modref and can be unfoled,
|
||||
// then it's worthwhile to issue a load from SS into the free register and
|
||||
// then unfold these instructions.
|
||||
if (!FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, VRM))
|
||||
return false;
|
||||
|
||||
// Load from SS to the spare physical register.
|
||||
TII->loadRegFromStackSlot(MBB, MII, PhysReg, SS, RC);
|
||||
// This invalidates Phys.
|
||||
Spills.ClobberPhysReg(PhysReg);
|
||||
// Remember it's available.
|
||||
Spills.addAvailable(SS, PhysReg);
|
||||
MaybeDeadStores[SS] = NULL;
|
||||
|
||||
// Unfold current MI.
|
||||
SmallVector<MachineInstr*, 4> NewMIs;
|
||||
if (!TII->unfoldMemoryOperand(MF, &MI, VirtReg, false, false, NewMIs))
|
||||
assert(0 && "Unable unfold the load / store folding instruction!");
|
||||
assert(NewMIs.size() == 1);
|
||||
AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg);
|
||||
VRM.transferRestorePts(&MI, NewMIs[0]);
|
||||
MII = MBB.insert(MII, NewMIs[0]);
|
||||
InvalidateKills(MI, RegKills, KillOps);
|
||||
VRM.RemoveMachineInstrFromMaps(&MI);
|
||||
MBB.erase(&MI);
|
||||
++NumModRefUnfold;
|
||||
|
||||
// Unfold next instructions that fold the same SS.
|
||||
do {
|
||||
MachineInstr &NextMI = *NextMII;
|
||||
NextMII = next(NextMII);
|
||||
NewMIs.clear();
|
||||
if (!TII->unfoldMemoryOperand(MF, &NextMI, VirtReg, false, false, NewMIs))
|
||||
assert(0 && "Unable unfold the load / store folding instruction!");
|
||||
assert(NewMIs.size() == 1);
|
||||
AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg);
|
||||
VRM.transferRestorePts(&NextMI, NewMIs[0]);
|
||||
MBB.insert(NextMII, NewMIs[0]);
|
||||
InvalidateKills(NextMI, RegKills, KillOps);
|
||||
VRM.RemoveMachineInstrFromMaps(&NextMI);
|
||||
MBB.erase(&NextMI);
|
||||
++NumModRefUnfold;
|
||||
} while (FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, VRM));
|
||||
|
||||
// Store the value back into SS.
|
||||
TII->storeRegToStackSlot(MBB, NextMII, PhysReg, true, SS, RC);
|
||||
MachineInstr *StoreMI = prior(NextMII);
|
||||
VRM.addSpillSlotUse(SS, StoreMI);
|
||||
VRM.virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// OptimizeByUnfold - Turn a store folding instruction into a load folding
|
||||
/// instruction. e.g.
|
||||
/// xorl %edi, %eax
|
||||
/// movl %eax, -32(%ebp)
|
||||
|
@ -607,7 +803,7 @@ bool LocalSpiller::runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM) {
|
|||
/// mov %eax, -32(%ebp)
|
||||
/// This enables unfolding optimization for a subsequent instruction which will
|
||||
/// also eliminate the newly introduced store instruction.
|
||||
bool LocalSpiller::PrepForUnfoldOpti(MachineBasicBlock &MBB,
|
||||
bool LocalSpiller::OptimizeByUnfold(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator &MII,
|
||||
std::vector<MachineInstr*> &MaybeDeadStores,
|
||||
AvailableSpills &Spills,
|
||||
|
@ -646,8 +842,14 @@ bool LocalSpiller::PrepForUnfoldOpti(MachineBasicBlock &MBB,
|
|||
}
|
||||
}
|
||||
|
||||
if (!UnfoldedOpc)
|
||||
return false;
|
||||
if (!UnfoldedOpc) {
|
||||
if (!UnfoldVR)
|
||||
return false;
|
||||
|
||||
// Look for other unfolding opportunities.
|
||||
return OptimizeByUnfold2(UnfoldVR, FoldedSS, MBB, MII,
|
||||
MaybeDeadStores, Spills, RegKills, KillOps, VRM);
|
||||
}
|
||||
|
||||
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
|
||||
MachineOperand &MO = MI.getOperand(i);
|
||||
|
@ -705,6 +907,7 @@ bool LocalSpiller::PrepForUnfoldOpti(MachineBasicBlock &MBB,
|
|||
MF.DeleteMachineInstr(NewMI);
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -770,7 +973,7 @@ bool LocalSpiller::CommuteToFoldReload(MachineBasicBlock &MBB,
|
|||
VRM.addSpillSlotUse(SS, FoldedMI);
|
||||
VRM.virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef);
|
||||
// Insert new def MI and spill MI.
|
||||
const TargetRegisterClass* RC = MF.getRegInfo().getRegClass(VirtReg);
|
||||
const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
|
||||
TII->storeRegToStackSlot(MBB, &MI, NewReg, true, SS, RC);
|
||||
MII = prior(MII);
|
||||
MachineInstr *StoreMI = MII;
|
||||
|
@ -935,13 +1138,13 @@ void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM,
|
|||
DistanceMap.clear();
|
||||
for (MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
|
||||
MII != E; ) {
|
||||
MachineBasicBlock::iterator NextMII = MII; ++NextMII;
|
||||
MachineBasicBlock::iterator NextMII = next(MII);
|
||||
|
||||
VirtRegMap::MI2VirtMapTy::const_iterator I, End;
|
||||
bool Erased = false;
|
||||
bool BackTracked = false;
|
||||
if (PrepForUnfoldOpti(MBB, MII,
|
||||
MaybeDeadStores, Spills, RegKills, KillOps, VRM))
|
||||
if (OptimizeByUnfold(MBB, MII,
|
||||
MaybeDeadStores, Spills, RegKills, KillOps, VRM))
|
||||
NextMII = next(MII);
|
||||
|
||||
MachineInstr &MI = *MII;
|
||||
|
|
|
@ -97,7 +97,7 @@ namespace llvm {
|
|||
const TargetRegisterInfo *getRegInfo() const { return TRI; }
|
||||
|
||||
/// getSpillSlotOrReMatPhysReg - If the specified stack slot or remat is
|
||||
/// available in a physical register, return that PhysReg, otherwise
|
||||
/// available in a physical register, return that PhysReg, otherwise
|
||||
/// return 0.
|
||||
unsigned getSpillSlotOrReMatPhysReg(int Slot) const {
|
||||
std::map<int, unsigned>::const_iterator I =
|
||||
|
@ -284,6 +284,7 @@ namespace llvm {
|
|||
MachineRegisterInfo *RegInfo;
|
||||
const TargetRegisterInfo *TRI;
|
||||
const TargetInstrInfo *TII;
|
||||
BitVector AllocatableRegs;
|
||||
DenseMap<MachineInstr*, unsigned> DistanceMap;
|
||||
public:
|
||||
bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM);
|
||||
|
@ -291,12 +292,22 @@ namespace llvm {
|
|||
void TransferDeadness(MachineBasicBlock *MBB, unsigned CurDist,
|
||||
unsigned Reg, BitVector &RegKills,
|
||||
std::vector<MachineOperand*> &KillOps);
|
||||
bool PrepForUnfoldOpti(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator &MII,
|
||||
|
||||
bool OptimizeByUnfold(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator &MII,
|
||||
std::vector<MachineInstr*> &MaybeDeadStores,
|
||||
AvailableSpills &Spills, BitVector &RegKills,
|
||||
std::vector<MachineOperand*> &KillOps,
|
||||
VirtRegMap &VRM);
|
||||
|
||||
bool OptimizeByUnfold2(unsigned VirtReg, int SS,
|
||||
MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator &MII,
|
||||
std::vector<MachineInstr*> &MaybeDeadStores,
|
||||
AvailableSpills &Spills, BitVector &RegKills,
|
||||
std::vector<MachineOperand*> &KillOps,
|
||||
VirtRegMap &VRM);
|
||||
|
||||
bool CommuteToFoldReload(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator &MII,
|
||||
unsigned VirtReg, unsigned SrcReg, int SS,
|
||||
|
@ -305,6 +316,7 @@ namespace llvm {
|
|||
std::vector<MachineOperand*> &KillOps,
|
||||
const TargetRegisterInfo *TRI,
|
||||
VirtRegMap &VRM);
|
||||
|
||||
void SpillRegToStackSlot(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator &MII,
|
||||
int Idx, unsigned PhysReg, int StackSlot,
|
||||
|
@ -315,6 +327,7 @@ namespace llvm {
|
|||
BitVector &RegKills,
|
||||
std::vector<MachineOperand*> &KillOps,
|
||||
VirtRegMap &VRM);
|
||||
|
||||
void RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM,
|
||||
AvailableSpills &Spills,
|
||||
BitVector &RegKills, std::vector<MachineOperand*> &KillOps);
|
||||
|
|
|
@ -0,0 +1,139 @@
|
|||
; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of modref unfolded}
|
||||
|
||||
%struct.SHA512_CTX = type { [8 x i64], i64, i64, %struct.anon, i32, i32 }
|
||||
%struct.anon = type { [16 x i64] }
|
||||
@K512 = external constant [80 x i64], align 32 ; <[80 x i64]*> [#uses=2]
|
||||
|
||||
define fastcc void @sha512_block_data_order(%struct.SHA512_CTX* nocapture %ctx, i8* nocapture %in, i64 %num) nounwind ssp {
|
||||
entry:
|
||||
br label %bb349
|
||||
|
||||
bb349: ; preds = %bb349, %entry
|
||||
%e.0489 = phi i64 [ 0, %entry ], [ %e.0, %bb349 ] ; <i64> [#uses=3]
|
||||
%b.0472 = phi i64 [ 0, %entry ], [ %87, %bb349 ] ; <i64> [#uses=2]
|
||||
%asmtmp356 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 41, i64 %e.0489) nounwind ; <i64> [#uses=1]
|
||||
%0 = xor i64 0, %asmtmp356 ; <i64> [#uses=1]
|
||||
%1 = add i64 0, %0 ; <i64> [#uses=1]
|
||||
%2 = add i64 %1, 0 ; <i64> [#uses=1]
|
||||
%3 = add i64 %2, 0 ; <i64> [#uses=1]
|
||||
%4 = add i64 %3, 0 ; <i64> [#uses=5]
|
||||
%asmtmp372 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 34, i64 %4) nounwind ; <i64> [#uses=1]
|
||||
%asmtmp373 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 39, i64 %4) nounwind ; <i64> [#uses=0]
|
||||
%5 = xor i64 %asmtmp372, 0 ; <i64> [#uses=0]
|
||||
%6 = xor i64 0, %b.0472 ; <i64> [#uses=1]
|
||||
%7 = and i64 %4, %6 ; <i64> [#uses=1]
|
||||
%8 = xor i64 %7, 0 ; <i64> [#uses=1]
|
||||
%9 = add i64 0, %8 ; <i64> [#uses=1]
|
||||
%10 = add i64 %9, 0 ; <i64> [#uses=2]
|
||||
%asmtmp377 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 61, i64 0) nounwind ; <i64> [#uses=1]
|
||||
%11 = xor i64 0, %asmtmp377 ; <i64> [#uses=1]
|
||||
%12 = add i64 0, %11 ; <i64> [#uses=1]
|
||||
%13 = add i64 %12, 0 ; <i64> [#uses=1]
|
||||
%not381 = xor i64 0, -1 ; <i64> [#uses=1]
|
||||
%14 = and i64 %e.0489, %not381 ; <i64> [#uses=1]
|
||||
%15 = xor i64 0, %14 ; <i64> [#uses=1]
|
||||
%16 = add i64 %15, 0 ; <i64> [#uses=1]
|
||||
%17 = add i64 %16, %13 ; <i64> [#uses=1]
|
||||
%18 = add i64 %17, 0 ; <i64> [#uses=1]
|
||||
%19 = add i64 %18, 0 ; <i64> [#uses=2]
|
||||
%20 = add i64 %19, %b.0472 ; <i64> [#uses=3]
|
||||
%21 = add i64 %19, 0 ; <i64> [#uses=1]
|
||||
%22 = add i64 %21, 0 ; <i64> [#uses=1]
|
||||
%23 = add i32 0, 12 ; <i32> [#uses=1]
|
||||
%24 = and i32 %23, 12 ; <i32> [#uses=1]
|
||||
%25 = zext i32 %24 to i64 ; <i64> [#uses=1]
|
||||
%26 = getelementptr [16 x i64]* null, i64 0, i64 %25 ; <i64*> [#uses=0]
|
||||
%27 = add i64 0, %e.0489 ; <i64> [#uses=1]
|
||||
%28 = add i64 %27, 0 ; <i64> [#uses=1]
|
||||
%29 = add i64 %28, 0 ; <i64> [#uses=1]
|
||||
%30 = add i64 %29, 0 ; <i64> [#uses=2]
|
||||
%31 = and i64 %10, %4 ; <i64> [#uses=1]
|
||||
%32 = xor i64 0, %31 ; <i64> [#uses=1]
|
||||
%33 = add i64 %30, 0 ; <i64> [#uses=3]
|
||||
%34 = add i64 %30, %32 ; <i64> [#uses=1]
|
||||
%35 = add i64 %34, 0 ; <i64> [#uses=1]
|
||||
%36 = and i64 %33, %20 ; <i64> [#uses=1]
|
||||
%37 = xor i64 %36, 0 ; <i64> [#uses=1]
|
||||
%38 = add i64 %37, 0 ; <i64> [#uses=1]
|
||||
%39 = add i64 %38, 0 ; <i64> [#uses=1]
|
||||
%40 = add i64 %39, 0 ; <i64> [#uses=1]
|
||||
%41 = add i64 %40, 0 ; <i64> [#uses=1]
|
||||
%42 = add i64 %41, %4 ; <i64> [#uses=3]
|
||||
%43 = or i32 0, 6 ; <i32> [#uses=1]
|
||||
%44 = and i32 %43, 14 ; <i32> [#uses=1]
|
||||
%45 = zext i32 %44 to i64 ; <i64> [#uses=1]
|
||||
%46 = getelementptr [16 x i64]* null, i64 0, i64 %45 ; <i64*> [#uses=1]
|
||||
%not417 = xor i64 %42, -1 ; <i64> [#uses=1]
|
||||
%47 = and i64 %20, %not417 ; <i64> [#uses=1]
|
||||
%48 = xor i64 0, %47 ; <i64> [#uses=1]
|
||||
%49 = getelementptr [80 x i64]* @K512, i64 0, i64 0 ; <i64*> [#uses=1]
|
||||
%50 = load i64* %49, align 8 ; <i64> [#uses=1]
|
||||
%51 = add i64 %48, 0 ; <i64> [#uses=1]
|
||||
%52 = add i64 %51, 0 ; <i64> [#uses=1]
|
||||
%53 = add i64 %52, 0 ; <i64> [#uses=1]
|
||||
%54 = add i64 %53, %50 ; <i64> [#uses=2]
|
||||
%asmtmp420 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 34, i64 0) nounwind ; <i64> [#uses=1]
|
||||
%asmtmp421 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 39, i64 0) nounwind ; <i64> [#uses=1]
|
||||
%55 = xor i64 %asmtmp420, 0 ; <i64> [#uses=1]
|
||||
%56 = xor i64 %55, %asmtmp421 ; <i64> [#uses=1]
|
||||
%57 = add i64 %54, %10 ; <i64> [#uses=5]
|
||||
%58 = add i64 %54, 0 ; <i64> [#uses=1]
|
||||
%59 = add i64 %58, %56 ; <i64> [#uses=2]
|
||||
%60 = or i32 0, 7 ; <i32> [#uses=1]
|
||||
%61 = and i32 %60, 15 ; <i32> [#uses=1]
|
||||
%62 = zext i32 %61 to i64 ; <i64> [#uses=1]
|
||||
%63 = getelementptr [16 x i64]* null, i64 0, i64 %62 ; <i64*> [#uses=2]
|
||||
%64 = load i64* null, align 8 ; <i64> [#uses=1]
|
||||
%65 = lshr i64 %64, 6 ; <i64> [#uses=1]
|
||||
%66 = xor i64 0, %65 ; <i64> [#uses=1]
|
||||
%67 = xor i64 %66, 0 ; <i64> [#uses=1]
|
||||
%68 = load i64* %46, align 8 ; <i64> [#uses=1]
|
||||
%69 = load i64* null, align 8 ; <i64> [#uses=1]
|
||||
%70 = add i64 %68, 0 ; <i64> [#uses=1]
|
||||
%71 = add i64 %70, %67 ; <i64> [#uses=1]
|
||||
%72 = add i64 %71, %69 ; <i64> [#uses=1]
|
||||
%asmtmp427 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 18, i64 %57) nounwind ; <i64> [#uses=1]
|
||||
%asmtmp428 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 41, i64 %57) nounwind ; <i64> [#uses=1]
|
||||
%73 = xor i64 %asmtmp427, 0 ; <i64> [#uses=1]
|
||||
%74 = xor i64 %73, %asmtmp428 ; <i64> [#uses=1]
|
||||
%75 = and i64 %57, %42 ; <i64> [#uses=1]
|
||||
%not429 = xor i64 %57, -1 ; <i64> [#uses=1]
|
||||
%76 = and i64 %33, %not429 ; <i64> [#uses=1]
|
||||
%77 = xor i64 %75, %76 ; <i64> [#uses=1]
|
||||
%78 = getelementptr [80 x i64]* @K512, i64 0, i64 0 ; <i64*> [#uses=1]
|
||||
%79 = load i64* %78, align 16 ; <i64> [#uses=1]
|
||||
%80 = add i64 %77, %20 ; <i64> [#uses=1]
|
||||
%81 = add i64 %80, %72 ; <i64> [#uses=1]
|
||||
%82 = add i64 %81, %74 ; <i64> [#uses=1]
|
||||
%83 = add i64 %82, %79 ; <i64> [#uses=1]
|
||||
%asmtmp432 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 34, i64 %59) nounwind ; <i64> [#uses=1]
|
||||
%asmtmp433 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 39, i64 %59) nounwind ; <i64> [#uses=1]
|
||||
%84 = xor i64 %asmtmp432, 0 ; <i64> [#uses=1]
|
||||
%85 = xor i64 %84, %asmtmp433 ; <i64> [#uses=1]
|
||||
%86 = add i64 %83, %22 ; <i64> [#uses=2]
|
||||
%87 = add i64 0, %85 ; <i64> [#uses=1]
|
||||
%asmtmp435 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 8, i64 0) nounwind ; <i64> [#uses=1]
|
||||
%88 = xor i64 0, %asmtmp435 ; <i64> [#uses=1]
|
||||
%89 = load i64* null, align 8 ; <i64> [#uses=3]
|
||||
%asmtmp436 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 19, i64 %89) nounwind ; <i64> [#uses=1]
|
||||
%asmtmp437 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 61, i64 %89) nounwind ; <i64> [#uses=1]
|
||||
%90 = lshr i64 %89, 6 ; <i64> [#uses=1]
|
||||
%91 = xor i64 %asmtmp436, %90 ; <i64> [#uses=1]
|
||||
%92 = xor i64 %91, %asmtmp437 ; <i64> [#uses=1]
|
||||
%93 = load i64* %63, align 8 ; <i64> [#uses=1]
|
||||
%94 = load i64* null, align 8 ; <i64> [#uses=1]
|
||||
%95 = add i64 %93, %88 ; <i64> [#uses=1]
|
||||
%96 = add i64 %95, %92 ; <i64> [#uses=1]
|
||||
%97 = add i64 %96, %94 ; <i64> [#uses=2]
|
||||
store i64 %97, i64* %63, align 8
|
||||
%98 = and i64 %86, %57 ; <i64> [#uses=1]
|
||||
%not441 = xor i64 %86, -1 ; <i64> [#uses=1]
|
||||
%99 = and i64 %42, %not441 ; <i64> [#uses=1]
|
||||
%100 = xor i64 %98, %99 ; <i64> [#uses=1]
|
||||
%101 = add i64 %100, %33 ; <i64> [#uses=1]
|
||||
%102 = add i64 %101, %97 ; <i64> [#uses=1]
|
||||
%103 = add i64 %102, 0 ; <i64> [#uses=1]
|
||||
%104 = add i64 %103, 0 ; <i64> [#uses=1]
|
||||
%e.0 = add i64 %104, %35 ; <i64> [#uses=1]
|
||||
br label %bb349
|
||||
}
|
Loading…
Reference in New Issue