Make fast-isel work correctly s/uadd.with.overflow intrinsics.

llvm-svn: 131420
This commit is contained in:
Eli Friedman 2011-05-16 21:06:17 +00:00
parent 4c08bb450a
commit a4d4a0162d
4 changed files with 37 additions and 84 deletions

View File

@ -310,7 +310,7 @@ protected:
/// the CFG.
void FastEmitBranch(MachineBasicBlock *MBB, DebugLoc DL);
unsigned UpdateValueMap(const Value* I, unsigned Reg);
void UpdateValueMap(const Value* I, unsigned Reg, unsigned NumRegs = 1);
unsigned createResultReg(const TargetRegisterClass *RC);

View File

@ -235,10 +235,10 @@ unsigned FastISel::lookUpRegForValue(const Value *V) {
/// NOTE: This is only necessary because we might select a block that uses
/// a value before we select the block that defines the value. It might be
/// possible to fix this by selecting blocks in reverse postorder.
unsigned FastISel::UpdateValueMap(const Value *I, unsigned Reg) {
void FastISel::UpdateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) {
if (!isa<Instruction>(I)) {
LocalValueMap[I] = Reg;
return Reg;
return;
}
unsigned &AssignedReg = FuncInfo.ValueMap[I];
@ -247,12 +247,11 @@ unsigned FastISel::UpdateValueMap(const Value *I, unsigned Reg) {
AssignedReg = Reg;
else if (Reg != AssignedReg) {
// Arrange for uses of AssignedReg to be replaced by uses of Reg.
FuncInfo.RegFixups[AssignedReg] = Reg;
for (unsigned i = 0; i < NumRegs; i++)
FuncInfo.RegFixups[AssignedReg+i] = Reg+i;
AssignedReg = Reg;
}
return AssignedReg;
}
std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) {
@ -845,12 +844,13 @@ FastISel::SelectExtractValue(const User *U) {
if (!EVI)
return false;
// Make sure we only try to handle extracts with a legal result.
// Make sure we only try to handle extracts with a legal result. But also
// allow i1 because it's easy.
EVT RealVT = TLI.getValueType(EVI->getType(), /*AllowUnknown=*/true);
if (!RealVT.isSimple())
return false;
MVT VT = RealVT.getSimpleVT();
if (!TLI.isTypeLegal(VT))
if (!TLI.isTypeLegal(VT) && VT != MVT::i1)
return false;
const Value *Op0 = EVI->getOperand(0);

View File

@ -1008,63 +1008,6 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
FuncInfo.MBB->addSuccessor(TrueMBB);
return true;
}
} else if (ExtractValueInst *EI =
dyn_cast<ExtractValueInst>(BI->getCondition())) {
// Check to see if the branch instruction is from an "arithmetic with
// overflow" intrinsic. The main way these intrinsics are used is:
//
// %t = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
// %sum = extractvalue { i32, i1 } %t, 0
// %obit = extractvalue { i32, i1 } %t, 1
// br i1 %obit, label %overflow, label %normal
//
// The %sum and %obit are converted in an ADD and a SETO/SETB before
// reaching the branch. Therefore, we search backwards through the MBB
// looking for the SETO/SETB instruction. If an instruction modifies the
// EFLAGS register before we reach the SETO/SETB instruction, then we can't
// convert the branch into a JO/JB instruction.
if (const IntrinsicInst *CI =
dyn_cast<IntrinsicInst>(EI->getAggregateOperand())){
if (CI->getIntrinsicID() == Intrinsic::sadd_with_overflow ||
CI->getIntrinsicID() == Intrinsic::uadd_with_overflow) {
const MachineInstr *SetMI = 0;
unsigned Reg = getRegForValue(EI);
for (MachineBasicBlock::const_reverse_iterator
RI = FuncInfo.MBB->rbegin(), RE = FuncInfo.MBB->rend();
RI != RE; ++RI) {
const MachineInstr &MI = *RI;
if (MI.definesRegister(Reg)) {
if (MI.isCopy()) {
Reg = MI.getOperand(1).getReg();
continue;
}
SetMI = &MI;
break;
}
const TargetInstrDesc &TID = MI.getDesc();
if (TID.hasImplicitDefOfPhysReg(X86::EFLAGS) ||
MI.hasUnmodeledSideEffects())
break;
}
if (SetMI) {
unsigned OpCode = SetMI->getOpcode();
if (OpCode == X86::SETOr || OpCode == X86::SETBr) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(OpCode == X86::SETOr ? X86::JO_4 : X86::JB_4))
.addMBB(TrueMBB);
FastEmitBranch(FalseMBB, DL);
FuncInfo.MBB->addSuccessor(TrueMBB);
return true;
}
}
}
}
} else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
// Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which
// typically happen for _Bool and C++ bools.
@ -1391,10 +1334,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
// FIXME: Should fold immediates.
// Replace "add with overflow" intrinsics with an "add" instruction followed
// by a seto/setc instruction. Later on, when the "extractvalue"
// instructions are encountered, we use the fact that two registers were
// created sequentially to get the correct registers for the "sum" and the
// "overflow bit".
// by a seto/setc instruction.
const Function *Callee = I.getCalledFunction();
const Type *RetTy =
cast<StructType>(Callee->getReturnType())->getTypeAtIndex(unsigned(0));
@ -1420,27 +1360,18 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
else
return false;
unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
// The call to CreateRegs builds two sequential registers, to store the
// both the the returned values.
unsigned ResultReg = FuncInfo.CreateRegs(I.getType());
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpC), ResultReg)
.addReg(Reg1).addReg(Reg2);
unsigned DestReg1 = UpdateValueMap(&I, ResultReg);
// If the add with overflow is an intra-block value then we just want to
// create temporaries for it like normal. If it is a cross-block value then
// UpdateValueMap will return the cross-block register used. Since we
// *really* want the value to be live in the register pair known by
// UpdateValueMap, we have to use DestReg1+1 as the destination register in
// the cross block case. In the non-cross-block case, we should just make
// another register for the value.
if (DestReg1 != ResultReg)
ResultReg = DestReg1+1;
else
ResultReg = createResultReg(TLI.getRegClassFor(MVT::i8));
unsigned Opc = X86::SETBr;
if (I.getIntrinsicID() == Intrinsic::sadd_with_overflow)
Opc = X86::SETOr;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg+1);
UpdateValueMap(&I, ResultReg, 2);
return true;
}
}

View File

@ -1,6 +1,7 @@
; RUN: llc < %s -mtriple x86_64-apple-darwin11 -O0 | FileCheck %s
%struct.x = type { i64, i64 }
%addovf = type { i32, i1 }
declare %struct.x @f()
define void @test1(i64*) nounwind ssp {
@ -24,3 +25,24 @@ define void @test2(i64*) nounwind ssp {
; CHECK: callq _f
; CHECK-NEXT: addq $10, %rdx
}
declare %addovf @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
define void @test3(i32 %x, i32 %y, i32* %z) {
%r = call %addovf @llvm.sadd.with.overflow.i32(i32 %x, i32 %y)
%sum = extractvalue %addovf %r, 0
%sum3 = mul i32 %sum, 3
%bit = extractvalue %addovf %r, 1
br i1 %bit, label %then, label %end
then:
store i32 %sum3, i32* %z
br label %end
end:
ret void
; CHECK: test3
; CHECK: addl
; CHECK: seto %al
; CHECK: testb $1, %al
}