forked from OSchip/llvm-project
Make fast-isel work correctly s/uadd.with.overflow intrinsics.
llvm-svn: 131420
This commit is contained in:
parent
4c08bb450a
commit
a4d4a0162d
|
@ -310,7 +310,7 @@ protected:
|
|||
/// the CFG.
|
||||
void FastEmitBranch(MachineBasicBlock *MBB, DebugLoc DL);
|
||||
|
||||
unsigned UpdateValueMap(const Value* I, unsigned Reg);
|
||||
void UpdateValueMap(const Value* I, unsigned Reg, unsigned NumRegs = 1);
|
||||
|
||||
unsigned createResultReg(const TargetRegisterClass *RC);
|
||||
|
||||
|
|
|
@ -235,10 +235,10 @@ unsigned FastISel::lookUpRegForValue(const Value *V) {
|
|||
/// NOTE: This is only necessary because we might select a block that uses
|
||||
/// a value before we select the block that defines the value. It might be
|
||||
/// possible to fix this by selecting blocks in reverse postorder.
|
||||
unsigned FastISel::UpdateValueMap(const Value *I, unsigned Reg) {
|
||||
void FastISel::UpdateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) {
|
||||
if (!isa<Instruction>(I)) {
|
||||
LocalValueMap[I] = Reg;
|
||||
return Reg;
|
||||
return;
|
||||
}
|
||||
|
||||
unsigned &AssignedReg = FuncInfo.ValueMap[I];
|
||||
|
@ -247,12 +247,11 @@ unsigned FastISel::UpdateValueMap(const Value *I, unsigned Reg) {
|
|||
AssignedReg = Reg;
|
||||
else if (Reg != AssignedReg) {
|
||||
// Arrange for uses of AssignedReg to be replaced by uses of Reg.
|
||||
FuncInfo.RegFixups[AssignedReg] = Reg;
|
||||
for (unsigned i = 0; i < NumRegs; i++)
|
||||
FuncInfo.RegFixups[AssignedReg+i] = Reg+i;
|
||||
|
||||
AssignedReg = Reg;
|
||||
}
|
||||
|
||||
return AssignedReg;
|
||||
}
|
||||
|
||||
std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) {
|
||||
|
@ -845,12 +844,13 @@ FastISel::SelectExtractValue(const User *U) {
|
|||
if (!EVI)
|
||||
return false;
|
||||
|
||||
// Make sure we only try to handle extracts with a legal result.
|
||||
// Make sure we only try to handle extracts with a legal result. But also
|
||||
// allow i1 because it's easy.
|
||||
EVT RealVT = TLI.getValueType(EVI->getType(), /*AllowUnknown=*/true);
|
||||
if (!RealVT.isSimple())
|
||||
return false;
|
||||
MVT VT = RealVT.getSimpleVT();
|
||||
if (!TLI.isTypeLegal(VT))
|
||||
if (!TLI.isTypeLegal(VT) && VT != MVT::i1)
|
||||
return false;
|
||||
|
||||
const Value *Op0 = EVI->getOperand(0);
|
||||
|
|
|
@ -1008,63 +1008,6 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
|
|||
FuncInfo.MBB->addSuccessor(TrueMBB);
|
||||
return true;
|
||||
}
|
||||
} else if (ExtractValueInst *EI =
|
||||
dyn_cast<ExtractValueInst>(BI->getCondition())) {
|
||||
// Check to see if the branch instruction is from an "arithmetic with
|
||||
// overflow" intrinsic. The main way these intrinsics are used is:
|
||||
//
|
||||
// %t = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
|
||||
// %sum = extractvalue { i32, i1 } %t, 0
|
||||
// %obit = extractvalue { i32, i1 } %t, 1
|
||||
// br i1 %obit, label %overflow, label %normal
|
||||
//
|
||||
// The %sum and %obit are converted in an ADD and a SETO/SETB before
|
||||
// reaching the branch. Therefore, we search backwards through the MBB
|
||||
// looking for the SETO/SETB instruction. If an instruction modifies the
|
||||
// EFLAGS register before we reach the SETO/SETB instruction, then we can't
|
||||
// convert the branch into a JO/JB instruction.
|
||||
if (const IntrinsicInst *CI =
|
||||
dyn_cast<IntrinsicInst>(EI->getAggregateOperand())){
|
||||
if (CI->getIntrinsicID() == Intrinsic::sadd_with_overflow ||
|
||||
CI->getIntrinsicID() == Intrinsic::uadd_with_overflow) {
|
||||
const MachineInstr *SetMI = 0;
|
||||
unsigned Reg = getRegForValue(EI);
|
||||
|
||||
for (MachineBasicBlock::const_reverse_iterator
|
||||
RI = FuncInfo.MBB->rbegin(), RE = FuncInfo.MBB->rend();
|
||||
RI != RE; ++RI) {
|
||||
const MachineInstr &MI = *RI;
|
||||
|
||||
if (MI.definesRegister(Reg)) {
|
||||
if (MI.isCopy()) {
|
||||
Reg = MI.getOperand(1).getReg();
|
||||
continue;
|
||||
}
|
||||
|
||||
SetMI = &MI;
|
||||
break;
|
||||
}
|
||||
|
||||
const TargetInstrDesc &TID = MI.getDesc();
|
||||
if (TID.hasImplicitDefOfPhysReg(X86::EFLAGS) ||
|
||||
MI.hasUnmodeledSideEffects())
|
||||
break;
|
||||
}
|
||||
|
||||
if (SetMI) {
|
||||
unsigned OpCode = SetMI->getOpcode();
|
||||
|
||||
if (OpCode == X86::SETOr || OpCode == X86::SETBr) {
|
||||
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
|
||||
TII.get(OpCode == X86::SETOr ? X86::JO_4 : X86::JB_4))
|
||||
.addMBB(TrueMBB);
|
||||
FastEmitBranch(FalseMBB, DL);
|
||||
FuncInfo.MBB->addSuccessor(TrueMBB);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
|
||||
// Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which
|
||||
// typically happen for _Bool and C++ bools.
|
||||
|
@ -1391,10 +1334,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
|
|||
// FIXME: Should fold immediates.
|
||||
|
||||
// Replace "add with overflow" intrinsics with an "add" instruction followed
|
||||
// by a seto/setc instruction. Later on, when the "extractvalue"
|
||||
// instructions are encountered, we use the fact that two registers were
|
||||
// created sequentially to get the correct registers for the "sum" and the
|
||||
// "overflow bit".
|
||||
// by a seto/setc instruction.
|
||||
const Function *Callee = I.getCalledFunction();
|
||||
const Type *RetTy =
|
||||
cast<StructType>(Callee->getReturnType())->getTypeAtIndex(unsigned(0));
|
||||
|
@ -1420,27 +1360,18 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
|
|||
else
|
||||
return false;
|
||||
|
||||
unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
|
||||
// The call to CreateRegs builds two sequential registers, to store the
|
||||
// both the the returned values.
|
||||
unsigned ResultReg = FuncInfo.CreateRegs(I.getType());
|
||||
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpC), ResultReg)
|
||||
.addReg(Reg1).addReg(Reg2);
|
||||
unsigned DestReg1 = UpdateValueMap(&I, ResultReg);
|
||||
|
||||
// If the add with overflow is an intra-block value then we just want to
|
||||
// create temporaries for it like normal. If it is a cross-block value then
|
||||
// UpdateValueMap will return the cross-block register used. Since we
|
||||
// *really* want the value to be live in the register pair known by
|
||||
// UpdateValueMap, we have to use DestReg1+1 as the destination register in
|
||||
// the cross block case. In the non-cross-block case, we should just make
|
||||
// another register for the value.
|
||||
if (DestReg1 != ResultReg)
|
||||
ResultReg = DestReg1+1;
|
||||
else
|
||||
ResultReg = createResultReg(TLI.getRegClassFor(MVT::i8));
|
||||
|
||||
unsigned Opc = X86::SETBr;
|
||||
if (I.getIntrinsicID() == Intrinsic::sadd_with_overflow)
|
||||
Opc = X86::SETOr;
|
||||
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg);
|
||||
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg+1);
|
||||
|
||||
UpdateValueMap(&I, ResultReg, 2);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
; RUN: llc < %s -mtriple x86_64-apple-darwin11 -O0 | FileCheck %s
|
||||
|
||||
%struct.x = type { i64, i64 }
|
||||
%addovf = type { i32, i1 }
|
||||
declare %struct.x @f()
|
||||
|
||||
define void @test1(i64*) nounwind ssp {
|
||||
|
@ -24,3 +25,24 @@ define void @test2(i64*) nounwind ssp {
|
|||
; CHECK: callq _f
|
||||
; CHECK-NEXT: addq $10, %rdx
|
||||
}
|
||||
|
||||
declare %addovf @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
|
||||
|
||||
define void @test3(i32 %x, i32 %y, i32* %z) {
|
||||
%r = call %addovf @llvm.sadd.with.overflow.i32(i32 %x, i32 %y)
|
||||
%sum = extractvalue %addovf %r, 0
|
||||
%sum3 = mul i32 %sum, 3
|
||||
%bit = extractvalue %addovf %r, 1
|
||||
br i1 %bit, label %then, label %end
|
||||
|
||||
then:
|
||||
store i32 %sum3, i32* %z
|
||||
br label %end
|
||||
|
||||
end:
|
||||
ret void
|
||||
; CHECK: test3
|
||||
; CHECK: addl
|
||||
; CHECK: seto %al
|
||||
; CHECK: testb $1, %al
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue